[llvm] [SLP][TLI] Add cost model/vectorization support for [u|s]cmp (PR #106747)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 30 08:26:40 PDT 2024
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/106747
This patch adds cost model/vectorization support to vectorize [u|s]cmp intrinsic calls.
>From e1433b33cb9a5577a2e2b467dd953436b7efe0f3 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 30 Aug 2024 23:24:05 +0800
Subject: [PATCH] [SLP][TLI] Add vectorization support for [u|s]cmp
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 37 +
llvm/lib/Analysis/VectorUtils.cpp | 4 +
.../Analysis/CostModel/X86/icmp-codesize.ll | 572 ++++++-----
.../Analysis/CostModel/X86/icmp-latency.ll | 686 +++++++------
.../CostModel/X86/icmp-sizelatency.ll | 572 ++++++-----
llvm/test/Analysis/CostModel/X86/icmp.ll | 960 +++++++++---------
.../SLPVectorizer/X86/arith-scmp.ll | 646 ++++++++++++
.../SLPVectorizer/X86/arith-ucmp.ll | 681 +++++++++++++
8 files changed, 2878 insertions(+), 1280 deletions(-)
create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/arith-scmp.ll
create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/arith-ucmp.ll
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 47323ca067a435..f4bddcfc815ff7 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2196,6 +2196,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::bitreverse:
ISD = ISD::BITREVERSE;
break;
+ case Intrinsic::ucmp:
+ ISD = ISD::UCMP;
+ break;
+ case Intrinsic::scmp:
+ ISD = ISD::SCMP;
+ break;
}
auto *ST = dyn_cast<StructType>(RetTy);
@@ -2433,6 +2439,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
return Cost;
}
+ case Intrinsic::ucmp:
+ case Intrinsic::scmp: {
+ InstructionCost Cost = 0;
+ bool IsSigned = IID == Intrinsic::scmp;
+ ICmpInst::Predicate GTPred =
+ IsSigned ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT;
+ ICmpInst::Predicate LTPred =
+ IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
+ Type *CmpTy = Tys[0];
+
+ if (TLI->shouldExpandCmpUsingSelects()) {
+ // x < y ? -1 : (x > y ? 1 : 0)
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, RetTy,
+ GTPred, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, RetTy,
+ LTPred, CostKind);
+ } else {
+ // zext(x > y) - zext(x < y)
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,
+ GTPred, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,
+ LTPred, CostKind);
+ Cost +=
+ 2 * thisT()->getCastInstrCost(CastInst::ZExt, RetTy, CondTy,
+ TTI::CastContextHint::None, CostKind);
+ Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy,
+ CostKind);
+ }
+ return Cost;
+ }
default:
break;
}
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index cc742ab35f4498..0c806820a31e17 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -97,6 +97,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::fptoui_sat:
case Intrinsic::lrint:
case Intrinsic::llrint:
+ case Intrinsic::ucmp:
+ case Intrinsic::scmp:
return true;
default:
return false;
@@ -132,6 +134,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
case Intrinsic::fptoui_sat:
case Intrinsic::lrint:
case Intrinsic::llrint:
+ case Intrinsic::ucmp:
+ case Intrinsic::scmp:
return OpdIdx == -1 || OpdIdx == 0;
case Intrinsic::is_fpclass:
return OpdIdx == 0;
diff --git a/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll b/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll
index 2dc6737a3d8a07..b8d0e04daccc84 100644
--- a/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll
@@ -2090,142 +2090,188 @@ define i32 @cmp_int_ult(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64
}
define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) {
+; SSE2-LABEL: 'scmp_int'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'scmp_int'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
; AVX1-LABEL: 'scmp_int'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'scmp_int'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'scmp_int'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'scmp_int'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX1-LABEL: 'scmp_int'
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX2-LABEL: 'scmp_int'
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
@@ -2256,142 +2302,188 @@ define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i
}
define i32 @ucmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) {
+; SSE2-LABEL: 'ucmp_int'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'ucmp_int'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
; AVX1-LABEL: 'ucmp_int'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'ucmp_int'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'ucmp_int'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'ucmp_int'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX1-LABEL: 'ucmp_int'
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX2-LABEL: 'ucmp_int'
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
diff --git a/llvm/test/Analysis/CostModel/X86/icmp-latency.ll b/llvm/test/Analysis/CostModel/X86/icmp-latency.ll
index 726a6dd782a4f7..fb7b7f7fcb0613 100644
--- a/llvm/test/Analysis/CostModel/X86/icmp-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/icmp-latency.ll
@@ -2320,188 +2320,211 @@ define i32 @cmp_int_ult(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64
}
define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) {
+; SSE2-LABEL: 'scmp_int'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
; SSE42-LABEL: 'scmp_int'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX1-LABEL: 'scmp_int'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'scmp_int'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'scmp_int'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'scmp_int'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX1-LABEL: 'scmp_int'
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX2-LABEL: 'scmp_int'
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SLM-LABEL: 'scmp_int'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
@@ -2532,188 +2555,211 @@ define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i
}
define i32 @ucmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) {
+; SSE2-LABEL: 'ucmp_int'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
; SSE42-LABEL: 'ucmp_int'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX1-LABEL: 'ucmp_int'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'ucmp_int'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'ucmp_int'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'ucmp_int'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX1-LABEL: 'ucmp_int'
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX2-LABEL: 'ucmp_int'
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SLM-LABEL: 'ucmp_int'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
diff --git a/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll
index 0deaad5991fb2f..005d0b2ec45bec 100644
--- a/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll
@@ -2090,142 +2090,188 @@ define i32 @cmp_int_ult(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64
}
define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) {
+; SSE2-LABEL: 'scmp_int'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'scmp_int'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
; AVX1-LABEL: 'scmp_int'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'scmp_int'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'scmp_int'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'scmp_int'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX1-LABEL: 'scmp_int'
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX2-LABEL: 'scmp_int'
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
@@ -2256,142 +2302,188 @@ define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i
}
define i32 @ucmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) {
+; SSE2-LABEL: 'ucmp_int'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'ucmp_int'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
; AVX1-LABEL: 'ucmp_int'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'ucmp_int'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'ucmp_int'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'ucmp_int'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX1-LABEL: 'ucmp_int'
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; XOPAVX2-LABEL: 'ucmp_int'
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
diff --git a/llvm/test/Analysis/CostModel/X86/icmp.ll b/llvm/test/Analysis/CostModel/X86/icmp.ll
index 599895c2b5705a..0c272d93d32f13 100644
--- a/llvm/test/Analysis/CostModel/X86/icmp.ll
+++ b/llvm/test/Analysis/CostModel/X86/icmp.ll
@@ -3057,279 +3057,279 @@ define i32 @cmp_int_ult(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64
define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) {
; SSE2-LABEL: 'scmp_int'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE3-LABEL: 'scmp_int'
-; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'scmp_int'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'scmp_int'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'scmp_int'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'scmp_int'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'scmp_int'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'scmp_int'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'scmp_int'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; XOPAVX1-LABEL: 'scmp_int'
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; XOPAVX2-LABEL: 'scmp_int'
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'scmp_int'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8)
@@ -3361,279 +3361,279 @@ define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i
define i32 @ucmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) {
; SSE2-LABEL: 'ucmp_int'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE3-LABEL: 'ucmp_int'
-; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE3-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'ucmp_int'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'ucmp_int'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'ucmp_int'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'ucmp_int'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'ucmp_int'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'ucmp_int'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'ucmp_int'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; XOPAVX1-LABEL: 'ucmp_int'
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; XOPAVX2-LABEL: 'ucmp_int'
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'ucmp_int'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
-; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64)
+; SLM-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64)
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8)
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-scmp.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-scmp.ll
new file mode 100644
index 00000000000000..6e6bd8e526c605
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-scmp.ll
@@ -0,0 +1,646 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
+
+ at a64 = common global [8 x i64] zeroinitializer, align 64
+ at b64 = common global [8 x i64] zeroinitializer, align 64
+ at c64 = common global [8 x i64] zeroinitializer, align 64
+ at a32 = common global [16 x i32] zeroinitializer, align 64
+ at b32 = common global [16 x i32] zeroinitializer, align 64
+ at c32 = common global [16 x i32] zeroinitializer, align 64
+ at a16 = common global [32 x i16] zeroinitializer, align 64
+ at b16 = common global [32 x i16] zeroinitializer, align 64
+ at c16 = common global [32 x i16] zeroinitializer, align 64
+ at a8 = common global [64 x i8] zeroinitializer, align 64
+ at b8 = common global [64 x i8] zeroinitializer, align 64
+ at c8 = common global [64 x i8] zeroinitializer, align 64
+
+define void @scmp_v8i64() {
+; AVX-LABEL: @scmp_v8i64(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
+; AVX-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
+; AVX-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; AVX-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
+; AVX-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; AVX-NEXT: ret void
+;
+; AVX512-LABEL: @scmp_v8i64(
+; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
+; AVX512-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
+; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @c64, align 8
+; AVX512-NEXT: ret void
+;
+ %a0 = load i64, ptr @a64, align 8
+ %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+ %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+ %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+ %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+ %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+ %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+ %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+ %b0 = load i64, ptr @b64, align 8
+ %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+ %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+ %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+ %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+ %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+ %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+ %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
+ %r0 = call i64 @llvm.scmp.i64.i64(i64 %a0, i64 %b0)
+ %r1 = call i64 @llvm.scmp.i64.i64(i64 %a1, i64 %b1)
+ %r2 = call i64 @llvm.scmp.i64.i64(i64 %a2, i64 %b2)
+ %r3 = call i64 @llvm.scmp.i64.i64(i64 %a3, i64 %b3)
+ %r4 = call i64 @llvm.scmp.i64.i64(i64 %a4, i64 %b4)
+ %r5 = call i64 @llvm.scmp.i64.i64(i64 %a5, i64 %b5)
+ %r6 = call i64 @llvm.scmp.i64.i64(i64 %a6, i64 %b6)
+ %r7 = call i64 @llvm.scmp.i64.i64(i64 %a7, i64 %b7)
+ store i64 %r0, ptr @c64, align 8
+ store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+ store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+ store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+ store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+ store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+ store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+ store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
+ ret void
+}
+
+define void @scmp_v16i32() {
+; SSE-LABEL: @scmp_v16i32(
+; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
+; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+; SSE-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
+; SSE-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
+; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; SSE-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
+; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+; SSE-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
+; SSE-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @scmp_v16i32(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
+; AVX-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
+; AVX-NEXT: store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; AVX-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]])
+; AVX-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; AVX-NEXT: ret void
+;
+; AVX512-LABEL: @scmp_v16i32(
+; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
+; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
+; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @c32, align 4
+; AVX512-NEXT: ret void
+;
+ %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+ %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+ %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+ %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+ %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+ %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+ %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+ %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+ %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+ %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+ %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+ %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+ %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+ %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+ %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+ %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+ %b0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+ %b1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+ %b2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+ %b3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+ %b4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+ %b5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+ %b6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+ %b7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+ %b8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+ %b9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+ %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+ %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+ %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+ %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+ %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+ %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
+ %r0 = call i32 @llvm.scmp.i32.i32(i32 %a0 , i32 %b0 )
+ %r1 = call i32 @llvm.scmp.i32.i32(i32 %a1 , i32 %b1 )
+ %r2 = call i32 @llvm.scmp.i32.i32(i32 %a2 , i32 %b2 )
+ %r3 = call i32 @llvm.scmp.i32.i32(i32 %a3 , i32 %b3 )
+ %r4 = call i32 @llvm.scmp.i32.i32(i32 %a4 , i32 %b4 )
+ %r5 = call i32 @llvm.scmp.i32.i32(i32 %a5 , i32 %b5 )
+ %r6 = call i32 @llvm.scmp.i32.i32(i32 %a6 , i32 %b6 )
+ %r7 = call i32 @llvm.scmp.i32.i32(i32 %a7 , i32 %b7 )
+ %r8 = call i32 @llvm.scmp.i32.i32(i32 %a8 , i32 %b8 )
+ %r9 = call i32 @llvm.scmp.i32.i32(i32 %a9 , i32 %b9 )
+ %r10 = call i32 @llvm.scmp.i32.i32(i32 %a10, i32 %b10)
+ %r11 = call i32 @llvm.scmp.i32.i32(i32 %a11, i32 %b11)
+ %r12 = call i32 @llvm.scmp.i32.i32(i32 %a12, i32 %b12)
+ %r13 = call i32 @llvm.scmp.i32.i32(i32 %a13, i32 %b13)
+ %r14 = call i32 @llvm.scmp.i32.i32(i32 %a14, i32 %b14)
+ %r15 = call i32 @llvm.scmp.i32.i32(i32 %a15, i32 %b15)
+ store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+ store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+ store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+ store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+ store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+ store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+ store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+ store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+ store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+ store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+ store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+ store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+ store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+ store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+ store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+ store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
+ ret void
+}
+
+define void @scmp_v32i16() {
+; SSE-LABEL: @scmp_v32i16(
+; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
+; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
+; SSE-NEXT: store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
+; SSE-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
+; SSE-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+; SSE-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
+; SSE-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+; SSE-NEXT: [[TMP12:%.*]] = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
+; SSE-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @scmp_v32i16(
+; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
+; AVX-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
+; AVX-NEXT: store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+; AVX-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]])
+; AVX-NEXT: store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; AVX-NEXT: ret void
+;
+; AVX512-LABEL: @scmp_v32i16(
+; AVX512-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
+; AVX512-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
+; AVX512-NEXT: store <32 x i16> [[TMP3]], ptr @c16, align 2
+; AVX512-NEXT: ret void
+;
+ %a0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+ %a1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+ %a2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+ %a3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+ %a4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+ %a5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+ %a6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+ %a7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+ %a8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+ %a9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+ %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+ %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+ %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+ %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+ %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+ %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+ %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+ %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+ %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+ %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+ %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+ %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+ %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+ %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+ %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+ %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+ %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+ %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+ %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+ %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+ %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+ %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+ %b0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+ %b1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+ %b2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+ %b3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+ %b4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+ %b5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+ %b6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+ %b7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+ %b8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+ %b9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+ %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+ %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+ %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+ %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+ %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+ %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+ %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+ %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+ %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+ %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+ %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+ %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+ %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+ %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+ %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+ %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+ %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+ %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+ %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+ %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+ %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+ %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
+ %r0 = call i16 @llvm.scmp.i16.i16(i16 %a0 , i16 %b0 )
+ %r1 = call i16 @llvm.scmp.i16.i16(i16 %a1 , i16 %b1 )
+ %r2 = call i16 @llvm.scmp.i16.i16(i16 %a2 , i16 %b2 )
+ %r3 = call i16 @llvm.scmp.i16.i16(i16 %a3 , i16 %b3 )
+ %r4 = call i16 @llvm.scmp.i16.i16(i16 %a4 , i16 %b4 )
+ %r5 = call i16 @llvm.scmp.i16.i16(i16 %a5 , i16 %b5 )
+ %r6 = call i16 @llvm.scmp.i16.i16(i16 %a6 , i16 %b6 )
+ %r7 = call i16 @llvm.scmp.i16.i16(i16 %a7 , i16 %b7 )
+ %r8 = call i16 @llvm.scmp.i16.i16(i16 %a8 , i16 %b8 )
+ %r9 = call i16 @llvm.scmp.i16.i16(i16 %a9 , i16 %b9 )
+ %r10 = call i16 @llvm.scmp.i16.i16(i16 %a10, i16 %b10)
+ %r11 = call i16 @llvm.scmp.i16.i16(i16 %a11, i16 %b11)
+ %r12 = call i16 @llvm.scmp.i16.i16(i16 %a12, i16 %b12)
+ %r13 = call i16 @llvm.scmp.i16.i16(i16 %a13, i16 %b13)
+ %r14 = call i16 @llvm.scmp.i16.i16(i16 %a14, i16 %b14)
+ %r15 = call i16 @llvm.scmp.i16.i16(i16 %a15, i16 %b15)
+ %r16 = call i16 @llvm.scmp.i16.i16(i16 %a16, i16 %b16)
+ %r17 = call i16 @llvm.scmp.i16.i16(i16 %a17, i16 %b17)
+ %r18 = call i16 @llvm.scmp.i16.i16(i16 %a18, i16 %b18)
+ %r19 = call i16 @llvm.scmp.i16.i16(i16 %a19, i16 %b19)
+ %r20 = call i16 @llvm.scmp.i16.i16(i16 %a20, i16 %b20)
+ %r21 = call i16 @llvm.scmp.i16.i16(i16 %a21, i16 %b21)
+ %r22 = call i16 @llvm.scmp.i16.i16(i16 %a22, i16 %b22)
+ %r23 = call i16 @llvm.scmp.i16.i16(i16 %a23, i16 %b23)
+ %r24 = call i16 @llvm.scmp.i16.i16(i16 %a24, i16 %b24)
+ %r25 = call i16 @llvm.scmp.i16.i16(i16 %a25, i16 %b25)
+ %r26 = call i16 @llvm.scmp.i16.i16(i16 %a26, i16 %b26)
+ %r27 = call i16 @llvm.scmp.i16.i16(i16 %a27, i16 %b27)
+ %r28 = call i16 @llvm.scmp.i16.i16(i16 %a28, i16 %b28)
+ %r29 = call i16 @llvm.scmp.i16.i16(i16 %a29, i16 %b29)
+ %r30 = call i16 @llvm.scmp.i16.i16(i16 %a30, i16 %b30)
+ %r31 = call i16 @llvm.scmp.i16.i16(i16 %a31, i16 %b31)
+ store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+ store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+ store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+ store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+ store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+ store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+ store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+ store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+ store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+ store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+ store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+ store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+ store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+ store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+ store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+ store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+ store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+ store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+ store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+ store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+ store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+ store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+ store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+ store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+ store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+ store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+ store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+ store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+ store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+ store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+ store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+ store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
+ ret void
+}
+
+define void @scmp_v64i8() {
+; SSE-LABEL: @scmp_v64i8(
+; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
+; SSE-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+; SSE-NEXT: store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+; SSE-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
+; SSE-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
+; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
+; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @scmp_v64i8(
+; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
+; AVX-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]])
+; AVX-NEXT: store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+; AVX-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]])
+; AVX-NEXT: store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; AVX-NEXT: ret void
+;
+; AVX512-LABEL: @scmp_v64i8(
+; AVX512-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
+; AVX512-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
+; AVX512-NEXT: store <64 x i8> [[TMP3]], ptr @c8, align 1
+; AVX512-NEXT: ret void
+;
+ %a0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+ %a1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+ %a2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+ %a3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+ %a4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+ %a5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+ %a6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+ %a7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+ %a8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+ %a9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+ %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+ %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+ %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+ %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+ %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+ %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+ %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+ %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+ %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+ %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+ %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+ %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+ %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+ %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+ %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+ %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+ %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+ %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+ %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+ %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+ %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+ %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+ %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+ %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+ %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+ %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+ %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+ %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+ %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+ %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+ %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+ %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+ %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+ %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+ %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+ %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+ %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+ %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+ %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+ %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+ %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+ %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+ %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+ %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+ %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+ %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+ %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+ %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+ %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+ %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+ %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+ %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+ %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+ %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+ %b0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+ %b1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+ %b2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+ %b3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+ %b4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+ %b5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+ %b6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+ %b7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+ %b8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+ %b9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+ %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+ %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+ %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+ %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+ %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+ %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+ %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+ %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+ %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+ %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+ %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+ %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+ %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+ %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+ %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+ %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+ %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+ %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+ %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+ %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+ %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+ %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+ %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+ %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+ %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+ %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+ %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+ %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+ %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+ %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+ %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+ %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+ %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+ %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+ %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+ %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+ %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+ %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+ %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+ %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+ %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+ %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+ %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+ %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+ %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+ %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+ %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+ %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+ %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+ %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+ %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+ %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+ %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+ %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
+ %r0 = call i8 @llvm.scmp.i8.i8(i8 %a0 , i8 %b0 )
+ %r1 = call i8 @llvm.scmp.i8.i8(i8 %a1 , i8 %b1 )
+ %r2 = call i8 @llvm.scmp.i8.i8(i8 %a2 , i8 %b2 )
+ %r3 = call i8 @llvm.scmp.i8.i8(i8 %a3 , i8 %b3 )
+ %r4 = call i8 @llvm.scmp.i8.i8(i8 %a4 , i8 %b4 )
+ %r5 = call i8 @llvm.scmp.i8.i8(i8 %a5 , i8 %b5 )
+ %r6 = call i8 @llvm.scmp.i8.i8(i8 %a6 , i8 %b6 )
+ %r7 = call i8 @llvm.scmp.i8.i8(i8 %a7 , i8 %b7 )
+ %r8 = call i8 @llvm.scmp.i8.i8(i8 %a8 , i8 %b8 )
+ %r9 = call i8 @llvm.scmp.i8.i8(i8 %a9 , i8 %b9 )
+ %r10 = call i8 @llvm.scmp.i8.i8(i8 %a10, i8 %b10)
+ %r11 = call i8 @llvm.scmp.i8.i8(i8 %a11, i8 %b11)
+ %r12 = call i8 @llvm.scmp.i8.i8(i8 %a12, i8 %b12)
+ %r13 = call i8 @llvm.scmp.i8.i8(i8 %a13, i8 %b13)
+ %r14 = call i8 @llvm.scmp.i8.i8(i8 %a14, i8 %b14)
+ %r15 = call i8 @llvm.scmp.i8.i8(i8 %a15, i8 %b15)
+ %r16 = call i8 @llvm.scmp.i8.i8(i8 %a16, i8 %b16)
+ %r17 = call i8 @llvm.scmp.i8.i8(i8 %a17, i8 %b17)
+ %r18 = call i8 @llvm.scmp.i8.i8(i8 %a18, i8 %b18)
+ %r19 = call i8 @llvm.scmp.i8.i8(i8 %a19, i8 %b19)
+ %r20 = call i8 @llvm.scmp.i8.i8(i8 %a20, i8 %b20)
+ %r21 = call i8 @llvm.scmp.i8.i8(i8 %a21, i8 %b21)
+ %r22 = call i8 @llvm.scmp.i8.i8(i8 %a22, i8 %b22)
+ %r23 = call i8 @llvm.scmp.i8.i8(i8 %a23, i8 %b23)
+ %r24 = call i8 @llvm.scmp.i8.i8(i8 %a24, i8 %b24)
+ %r25 = call i8 @llvm.scmp.i8.i8(i8 %a25, i8 %b25)
+ %r26 = call i8 @llvm.scmp.i8.i8(i8 %a26, i8 %b26)
+ %r27 = call i8 @llvm.scmp.i8.i8(i8 %a27, i8 %b27)
+ %r28 = call i8 @llvm.scmp.i8.i8(i8 %a28, i8 %b28)
+ %r29 = call i8 @llvm.scmp.i8.i8(i8 %a29, i8 %b29)
+ %r30 = call i8 @llvm.scmp.i8.i8(i8 %a30, i8 %b30)
+ %r31 = call i8 @llvm.scmp.i8.i8(i8 %a31, i8 %b31)
+ %r32 = call i8 @llvm.scmp.i8.i8(i8 %a32, i8 %b32)
+ %r33 = call i8 @llvm.scmp.i8.i8(i8 %a33, i8 %b33)
+ %r34 = call i8 @llvm.scmp.i8.i8(i8 %a34, i8 %b34)
+ %r35 = call i8 @llvm.scmp.i8.i8(i8 %a35, i8 %b35)
+ %r36 = call i8 @llvm.scmp.i8.i8(i8 %a36, i8 %b36)
+ %r37 = call i8 @llvm.scmp.i8.i8(i8 %a37, i8 %b37)
+ %r38 = call i8 @llvm.scmp.i8.i8(i8 %a38, i8 %b38)
+ %r39 = call i8 @llvm.scmp.i8.i8(i8 %a39, i8 %b39)
+ %r40 = call i8 @llvm.scmp.i8.i8(i8 %a40, i8 %b40)
+ %r41 = call i8 @llvm.scmp.i8.i8(i8 %a41, i8 %b41)
+ %r42 = call i8 @llvm.scmp.i8.i8(i8 %a42, i8 %b42)
+ %r43 = call i8 @llvm.scmp.i8.i8(i8 %a43, i8 %b43)
+ %r44 = call i8 @llvm.scmp.i8.i8(i8 %a44, i8 %b44)
+ %r45 = call i8 @llvm.scmp.i8.i8(i8 %a45, i8 %b45)
+ %r46 = call i8 @llvm.scmp.i8.i8(i8 %a46, i8 %b46)
+ %r47 = call i8 @llvm.scmp.i8.i8(i8 %a47, i8 %b47)
+ %r48 = call i8 @llvm.scmp.i8.i8(i8 %a48, i8 %b48)
+ %r49 = call i8 @llvm.scmp.i8.i8(i8 %a49, i8 %b49)
+ %r50 = call i8 @llvm.scmp.i8.i8(i8 %a50, i8 %b50)
+ %r51 = call i8 @llvm.scmp.i8.i8(i8 %a51, i8 %b51)
+ %r52 = call i8 @llvm.scmp.i8.i8(i8 %a52, i8 %b52)
+ %r53 = call i8 @llvm.scmp.i8.i8(i8 %a53, i8 %b53)
+ %r54 = call i8 @llvm.scmp.i8.i8(i8 %a54, i8 %b54)
+ %r55 = call i8 @llvm.scmp.i8.i8(i8 %a55, i8 %b55)
+ %r56 = call i8 @llvm.scmp.i8.i8(i8 %a56, i8 %b56)
+ %r57 = call i8 @llvm.scmp.i8.i8(i8 %a57, i8 %b57)
+ %r58 = call i8 @llvm.scmp.i8.i8(i8 %a58, i8 %b58)
+ %r59 = call i8 @llvm.scmp.i8.i8(i8 %a59, i8 %b59)
+ %r60 = call i8 @llvm.scmp.i8.i8(i8 %a60, i8 %b60)
+ %r61 = call i8 @llvm.scmp.i8.i8(i8 %a61, i8 %b61)
+ %r62 = call i8 @llvm.scmp.i8.i8(i8 %a62, i8 %b62)
+ %r63 = call i8 @llvm.scmp.i8.i8(i8 %a63, i8 %b63)
+ store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+ store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+ store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+ store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+ store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+ store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+ store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+ store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+ store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+ store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+ store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+ store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+ store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+ store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+ store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+ store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+ store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+ store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+ store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+ store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+ store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+ store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+ store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+ store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+ store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+ store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+ store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+ store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+ store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+ store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+ store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+ store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+ store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+ store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+ store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+ store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+ store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+ store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+ store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+ store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+ store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+ store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+ store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+ store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+ store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+ store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+ store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+ store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+ store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+ store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+ store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+ store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+ store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+ store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+ store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+ store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+ store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+ store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+ store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+ store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+ store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+ store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+ store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+ store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
+ ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-ucmp.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-ucmp.ll
new file mode 100644
index 00000000000000..8413238d969c59
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-ucmp.ll
@@ -0,0 +1,681 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
+
+ at a64 = common global [8 x i64] zeroinitializer, align 64
+ at b64 = common global [8 x i64] zeroinitializer, align 64
+ at c64 = common global [8 x i64] zeroinitializer, align 64
+ at a32 = common global [16 x i32] zeroinitializer, align 64
+ at b32 = common global [16 x i32] zeroinitializer, align 64
+ at c32 = common global [16 x i32] zeroinitializer, align 64
+ at a16 = common global [32 x i16] zeroinitializer, align 64
+ at b16 = common global [32 x i16] zeroinitializer, align 64
+ at c16 = common global [32 x i16] zeroinitializer, align 64
+ at a8 = common global [64 x i8] zeroinitializer, align 64
+ at b8 = common global [64 x i8] zeroinitializer, align 64
+ at c8 = common global [64 x i8] zeroinitializer, align 64
+
+define void @ucmp_v8i64() {
+; SSE-LABEL: @ucmp_v8i64(
+; SSE-NEXT: [[A0:%.*]] = load i64, ptr @a64, align 8
+; SSE-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; SSE-NEXT: [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT: [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; SSE-NEXT: [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT: [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; SSE-NEXT: [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT: [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; SSE-NEXT: [[B0:%.*]] = load i64, ptr @b64, align 8
+; SSE-NEXT: [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; SSE-NEXT: [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; SSE-NEXT: [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; SSE-NEXT: [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; SSE-NEXT: [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; SSE-NEXT: [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; SSE-NEXT: [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
+; SSE-NEXT: [[R0:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A0]], i64 [[B0]])
+; SSE-NEXT: [[R1:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A1]], i64 [[B1]])
+; SSE-NEXT: [[R2:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A2]], i64 [[B2]])
+; SSE-NEXT: [[R3:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A3]], i64 [[B3]])
+; SSE-NEXT: [[R4:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A4]], i64 [[B4]])
+; SSE-NEXT: [[R5:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A5]], i64 [[B5]])
+; SSE-NEXT: [[R6:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A6]], i64 [[B6]])
+; SSE-NEXT: [[R7:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A7]], i64 [[B7]])
+; SSE-NEXT: store i64 [[R0]], ptr @c64, align 8
+; SSE-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; SSE-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; SSE-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; SSE-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; SSE-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @ucmp_v8i64(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
+; AVX-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
+; AVX-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; AVX-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
+; AVX-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; AVX-NEXT: ret void
+;
+; AVX512-LABEL: @ucmp_v8i64(
+; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
+; AVX512-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
+; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @c64, align 8
+; AVX512-NEXT: ret void
+;
+ %a0 = load i64, ptr @a64, align 8
+ %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+ %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+ %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+ %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+ %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+ %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+ %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+ %b0 = load i64, ptr @b64, align 8
+ %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+ %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+ %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+ %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+ %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+ %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+ %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
+ %r0 = call i64 @llvm.ucmp.i64.i64(i64 %a0, i64 %b0)
+ %r1 = call i64 @llvm.ucmp.i64.i64(i64 %a1, i64 %b1)
+ %r2 = call i64 @llvm.ucmp.i64.i64(i64 %a2, i64 %b2)
+ %r3 = call i64 @llvm.ucmp.i64.i64(i64 %a3, i64 %b3)
+ %r4 = call i64 @llvm.ucmp.i64.i64(i64 %a4, i64 %b4)
+ %r5 = call i64 @llvm.ucmp.i64.i64(i64 %a5, i64 %b5)
+ %r6 = call i64 @llvm.ucmp.i64.i64(i64 %a6, i64 %b6)
+ %r7 = call i64 @llvm.ucmp.i64.i64(i64 %a7, i64 %b7)
+ store i64 %r0, ptr @c64, align 8
+ store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+ store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+ store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+ store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+ store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+ store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+ store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
+ ret void
+}
+
+define void @ucmp_v16i32() {
+; SSE-LABEL: @ucmp_v16i32(
+; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
+; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+; SSE-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
+; SSE-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
+; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; SSE-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
+; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+; SSE-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
+; SSE-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @ucmp_v16i32(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
+; AVX-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
+; AVX-NEXT: store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; AVX-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]])
+; AVX-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; AVX-NEXT: ret void
+;
+; AVX512-LABEL: @ucmp_v16i32(
+; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
+; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
+; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @c32, align 4
+; AVX512-NEXT: ret void
+;
+ %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+ %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+ %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+ %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+ %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+ %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+ %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+ %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+ %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+ %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+ %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+ %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+ %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+ %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+ %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+ %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+ %b0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+ %b1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+ %b2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+ %b3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+ %b4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+ %b5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+ %b6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+ %b7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+ %b8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+ %b9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+ %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+ %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+ %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+ %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+ %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+ %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
+ %r0 = call i32 @llvm.ucmp.i32.i32(i32 %a0 , i32 %b0 )
+ %r1 = call i32 @llvm.ucmp.i32.i32(i32 %a1 , i32 %b1 )
+ %r2 = call i32 @llvm.ucmp.i32.i32(i32 %a2 , i32 %b2 )
+ %r3 = call i32 @llvm.ucmp.i32.i32(i32 %a3 , i32 %b3 )
+ %r4 = call i32 @llvm.ucmp.i32.i32(i32 %a4 , i32 %b4 )
+ %r5 = call i32 @llvm.ucmp.i32.i32(i32 %a5 , i32 %b5 )
+ %r6 = call i32 @llvm.ucmp.i32.i32(i32 %a6 , i32 %b6 )
+ %r7 = call i32 @llvm.ucmp.i32.i32(i32 %a7 , i32 %b7 )
+ %r8 = call i32 @llvm.ucmp.i32.i32(i32 %a8 , i32 %b8 )
+ %r9 = call i32 @llvm.ucmp.i32.i32(i32 %a9 , i32 %b9 )
+ %r10 = call i32 @llvm.ucmp.i32.i32(i32 %a10, i32 %b10)
+ %r11 = call i32 @llvm.ucmp.i32.i32(i32 %a11, i32 %b11)
+ %r12 = call i32 @llvm.ucmp.i32.i32(i32 %a12, i32 %b12)
+ %r13 = call i32 @llvm.ucmp.i32.i32(i32 %a13, i32 %b13)
+ %r14 = call i32 @llvm.ucmp.i32.i32(i32 %a14, i32 %b14)
+ %r15 = call i32 @llvm.ucmp.i32.i32(i32 %a15, i32 %b15)
+ store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+ store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+ store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+ store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+ store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+ store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+ store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+ store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+ store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+ store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+ store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+ store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+ store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+ store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+ store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+ store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
+ ret void
+}
+
+define void @ucmp_v32i16() {
+; SSE-LABEL: @ucmp_v32i16(
+; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
+; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
+; SSE-NEXT: store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
+; SSE-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
+; SSE-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+; SSE-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
+; SSE-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+; SSE-NEXT: [[TMP12:%.*]] = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
+; SSE-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @ucmp_v32i16(
+; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
+; AVX-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
+; AVX-NEXT: store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+; AVX-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]])
+; AVX-NEXT: store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; AVX-NEXT: ret void
+;
+; AVX512-LABEL: @ucmp_v32i16(
+; AVX512-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
+; AVX512-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
+; AVX512-NEXT: store <32 x i16> [[TMP3]], ptr @c16, align 2
+; AVX512-NEXT: ret void
+;
+ %a0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+ %a1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+ %a2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+ %a3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+ %a4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+ %a5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+ %a6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+ %a7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+ %a8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+ %a9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+ %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+ %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+ %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+ %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+ %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+ %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+ %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+ %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+ %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+ %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+ %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+ %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+ %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+ %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+ %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+ %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+ %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+ %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+ %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+ %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+ %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+ %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+ %b0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+ %b1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+ %b2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+ %b3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+ %b4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+ %b5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+ %b6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+ %b7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+ %b8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+ %b9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+ %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+ %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+ %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+ %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+ %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+ %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+ %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+ %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+ %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+ %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+ %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+ %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+ %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+ %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+ %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+ %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+ %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+ %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+ %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+ %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+ %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+ %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
+ %r0 = call i16 @llvm.ucmp.i16.i16(i16 %a0 , i16 %b0 )
+ %r1 = call i16 @llvm.ucmp.i16.i16(i16 %a1 , i16 %b1 )
+ %r2 = call i16 @llvm.ucmp.i16.i16(i16 %a2 , i16 %b2 )
+ %r3 = call i16 @llvm.ucmp.i16.i16(i16 %a3 , i16 %b3 )
+ %r4 = call i16 @llvm.ucmp.i16.i16(i16 %a4 , i16 %b4 )
+ %r5 = call i16 @llvm.ucmp.i16.i16(i16 %a5 , i16 %b5 )
+ %r6 = call i16 @llvm.ucmp.i16.i16(i16 %a6 , i16 %b6 )
+ %r7 = call i16 @llvm.ucmp.i16.i16(i16 %a7 , i16 %b7 )
+ %r8 = call i16 @llvm.ucmp.i16.i16(i16 %a8 , i16 %b8 )
+ %r9 = call i16 @llvm.ucmp.i16.i16(i16 %a9 , i16 %b9 )
+ %r10 = call i16 @llvm.ucmp.i16.i16(i16 %a10, i16 %b10)
+ %r11 = call i16 @llvm.ucmp.i16.i16(i16 %a11, i16 %b11)
+ %r12 = call i16 @llvm.ucmp.i16.i16(i16 %a12, i16 %b12)
+ %r13 = call i16 @llvm.ucmp.i16.i16(i16 %a13, i16 %b13)
+ %r14 = call i16 @llvm.ucmp.i16.i16(i16 %a14, i16 %b14)
+ %r15 = call i16 @llvm.ucmp.i16.i16(i16 %a15, i16 %b15)
+ %r16 = call i16 @llvm.ucmp.i16.i16(i16 %a16, i16 %b16)
+ %r17 = call i16 @llvm.ucmp.i16.i16(i16 %a17, i16 %b17)
+ %r18 = call i16 @llvm.ucmp.i16.i16(i16 %a18, i16 %b18)
+ %r19 = call i16 @llvm.ucmp.i16.i16(i16 %a19, i16 %b19)
+ %r20 = call i16 @llvm.ucmp.i16.i16(i16 %a20, i16 %b20)
+ %r21 = call i16 @llvm.ucmp.i16.i16(i16 %a21, i16 %b21)
+ %r22 = call i16 @llvm.ucmp.i16.i16(i16 %a22, i16 %b22)
+ %r23 = call i16 @llvm.ucmp.i16.i16(i16 %a23, i16 %b23)
+ %r24 = call i16 @llvm.ucmp.i16.i16(i16 %a24, i16 %b24)
+ %r25 = call i16 @llvm.ucmp.i16.i16(i16 %a25, i16 %b25)
+ %r26 = call i16 @llvm.ucmp.i16.i16(i16 %a26, i16 %b26)
+ %r27 = call i16 @llvm.ucmp.i16.i16(i16 %a27, i16 %b27)
+ %r28 = call i16 @llvm.ucmp.i16.i16(i16 %a28, i16 %b28)
+ %r29 = call i16 @llvm.ucmp.i16.i16(i16 %a29, i16 %b29)
+ %r30 = call i16 @llvm.ucmp.i16.i16(i16 %a30, i16 %b30)
+ %r31 = call i16 @llvm.ucmp.i16.i16(i16 %a31, i16 %b31)
+ store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+ store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+ store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+ store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+ store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+ store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+ store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+ store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+ store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+ store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+ store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+ store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+ store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+ store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+ store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+ store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+ store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+ store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+ store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+ store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+ store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+ store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+ store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+ store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+ store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+ store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+ store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+ store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+ store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+ store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+ store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+ store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
+ ret void
+}
+
+define void @ucmp_v64i8() {
+; SSE-LABEL: @ucmp_v64i8(
+; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
+; SSE-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+; SSE-NEXT: store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+; SSE-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
+; SSE-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
+; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
+; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @ucmp_v64i8(
+; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
+; AVX-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]])
+; AVX-NEXT: store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+; AVX-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]])
+; AVX-NEXT: store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; AVX-NEXT: ret void
+;
+; AVX512-LABEL: @ucmp_v64i8(
+; AVX512-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
+; AVX512-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
+; AVX512-NEXT: store <64 x i8> [[TMP3]], ptr @c8, align 1
+; AVX512-NEXT: ret void
+;
+ %a0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+ %a1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+ %a2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+ %a3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+ %a4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+ %a5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+ %a6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+ %a7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+ %a8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+ %a9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+ %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+ %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+ %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+ %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+ %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+ %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+ %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+ %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+ %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+ %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+ %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+ %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+ %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+ %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+ %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+ %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+ %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+ %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+ %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+ %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+ %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+ %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+ %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+ %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+ %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+ %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+ %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+ %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+ %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+ %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+ %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+ %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+ %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+ %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+ %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+ %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+ %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+ %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+ %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+ %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+ %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+ %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+ %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+ %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+ %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+ %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+ %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+ %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+ %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+ %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+ %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+ %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+ %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+ %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+ %b0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+ %b1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+ %b2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+ %b3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+ %b4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+ %b5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+ %b6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+ %b7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+ %b8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+ %b9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+ %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+ %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+ %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+ %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+ %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+ %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+ %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+ %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+ %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+ %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+ %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+ %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+ %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+ %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+ %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+ %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+ %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+ %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+ %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+ %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+ %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+ %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+ %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+ %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+ %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+ %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+ %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+ %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+ %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+ %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+ %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+ %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+ %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+ %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+ %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+ %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+ %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+ %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+ %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+ %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+ %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+ %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+ %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+ %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+ %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+ %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+ %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+ %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+ %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+ %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+ %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+ %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+ %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+ %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
+ %r0 = call i8 @llvm.ucmp.i8.i8(i8 %a0 , i8 %b0 )
+ %r1 = call i8 @llvm.ucmp.i8.i8(i8 %a1 , i8 %b1 )
+ %r2 = call i8 @llvm.ucmp.i8.i8(i8 %a2 , i8 %b2 )
+ %r3 = call i8 @llvm.ucmp.i8.i8(i8 %a3 , i8 %b3 )
+ %r4 = call i8 @llvm.ucmp.i8.i8(i8 %a4 , i8 %b4 )
+ %r5 = call i8 @llvm.ucmp.i8.i8(i8 %a5 , i8 %b5 )
+ %r6 = call i8 @llvm.ucmp.i8.i8(i8 %a6 , i8 %b6 )
+ %r7 = call i8 @llvm.ucmp.i8.i8(i8 %a7 , i8 %b7 )
+ %r8 = call i8 @llvm.ucmp.i8.i8(i8 %a8 , i8 %b8 )
+ %r9 = call i8 @llvm.ucmp.i8.i8(i8 %a9 , i8 %b9 )
+ %r10 = call i8 @llvm.ucmp.i8.i8(i8 %a10, i8 %b10)
+ %r11 = call i8 @llvm.ucmp.i8.i8(i8 %a11, i8 %b11)
+ %r12 = call i8 @llvm.ucmp.i8.i8(i8 %a12, i8 %b12)
+ %r13 = call i8 @llvm.ucmp.i8.i8(i8 %a13, i8 %b13)
+ %r14 = call i8 @llvm.ucmp.i8.i8(i8 %a14, i8 %b14)
+ %r15 = call i8 @llvm.ucmp.i8.i8(i8 %a15, i8 %b15)
+ %r16 = call i8 @llvm.ucmp.i8.i8(i8 %a16, i8 %b16)
+ %r17 = call i8 @llvm.ucmp.i8.i8(i8 %a17, i8 %b17)
+ %r18 = call i8 @llvm.ucmp.i8.i8(i8 %a18, i8 %b18)
+ %r19 = call i8 @llvm.ucmp.i8.i8(i8 %a19, i8 %b19)
+ %r20 = call i8 @llvm.ucmp.i8.i8(i8 %a20, i8 %b20)
+ %r21 = call i8 @llvm.ucmp.i8.i8(i8 %a21, i8 %b21)
+ %r22 = call i8 @llvm.ucmp.i8.i8(i8 %a22, i8 %b22)
+ %r23 = call i8 @llvm.ucmp.i8.i8(i8 %a23, i8 %b23)
+ %r24 = call i8 @llvm.ucmp.i8.i8(i8 %a24, i8 %b24)
+ %r25 = call i8 @llvm.ucmp.i8.i8(i8 %a25, i8 %b25)
+ %r26 = call i8 @llvm.ucmp.i8.i8(i8 %a26, i8 %b26)
+ %r27 = call i8 @llvm.ucmp.i8.i8(i8 %a27, i8 %b27)
+ %r28 = call i8 @llvm.ucmp.i8.i8(i8 %a28, i8 %b28)
+ %r29 = call i8 @llvm.ucmp.i8.i8(i8 %a29, i8 %b29)
+ %r30 = call i8 @llvm.ucmp.i8.i8(i8 %a30, i8 %b30)
+ %r31 = call i8 @llvm.ucmp.i8.i8(i8 %a31, i8 %b31)
+ %r32 = call i8 @llvm.ucmp.i8.i8(i8 %a32, i8 %b32)
+ %r33 = call i8 @llvm.ucmp.i8.i8(i8 %a33, i8 %b33)
+ %r34 = call i8 @llvm.ucmp.i8.i8(i8 %a34, i8 %b34)
+ %r35 = call i8 @llvm.ucmp.i8.i8(i8 %a35, i8 %b35)
+ %r36 = call i8 @llvm.ucmp.i8.i8(i8 %a36, i8 %b36)
+ %r37 = call i8 @llvm.ucmp.i8.i8(i8 %a37, i8 %b37)
+ %r38 = call i8 @llvm.ucmp.i8.i8(i8 %a38, i8 %b38)
+ %r39 = call i8 @llvm.ucmp.i8.i8(i8 %a39, i8 %b39)
+ %r40 = call i8 @llvm.ucmp.i8.i8(i8 %a40, i8 %b40)
+ %r41 = call i8 @llvm.ucmp.i8.i8(i8 %a41, i8 %b41)
+ %r42 = call i8 @llvm.ucmp.i8.i8(i8 %a42, i8 %b42)
+ %r43 = call i8 @llvm.ucmp.i8.i8(i8 %a43, i8 %b43)
+ %r44 = call i8 @llvm.ucmp.i8.i8(i8 %a44, i8 %b44)
+ %r45 = call i8 @llvm.ucmp.i8.i8(i8 %a45, i8 %b45)
+ %r46 = call i8 @llvm.ucmp.i8.i8(i8 %a46, i8 %b46)
+ %r47 = call i8 @llvm.ucmp.i8.i8(i8 %a47, i8 %b47)
+ %r48 = call i8 @llvm.ucmp.i8.i8(i8 %a48, i8 %b48)
+ %r49 = call i8 @llvm.ucmp.i8.i8(i8 %a49, i8 %b49)
+ %r50 = call i8 @llvm.ucmp.i8.i8(i8 %a50, i8 %b50)
+ %r51 = call i8 @llvm.ucmp.i8.i8(i8 %a51, i8 %b51)
+ %r52 = call i8 @llvm.ucmp.i8.i8(i8 %a52, i8 %b52)
+ %r53 = call i8 @llvm.ucmp.i8.i8(i8 %a53, i8 %b53)
+ %r54 = call i8 @llvm.ucmp.i8.i8(i8 %a54, i8 %b54)
+ %r55 = call i8 @llvm.ucmp.i8.i8(i8 %a55, i8 %b55)
+ %r56 = call i8 @llvm.ucmp.i8.i8(i8 %a56, i8 %b56)
+ %r57 = call i8 @llvm.ucmp.i8.i8(i8 %a57, i8 %b57)
+ %r58 = call i8 @llvm.ucmp.i8.i8(i8 %a58, i8 %b58)
+ %r59 = call i8 @llvm.ucmp.i8.i8(i8 %a59, i8 %b59)
+ %r60 = call i8 @llvm.ucmp.i8.i8(i8 %a60, i8 %b60)
+ %r61 = call i8 @llvm.ucmp.i8.i8(i8 %a61, i8 %b61)
+ %r62 = call i8 @llvm.ucmp.i8.i8(i8 %a62, i8 %b62)
+ %r63 = call i8 @llvm.ucmp.i8.i8(i8 %a63, i8 %b63)
+ store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+ store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+ store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+ store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+ store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+ store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+ store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+ store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+ store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+ store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+ store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+ store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+ store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+ store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+ store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+ store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+ store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+ store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+ store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+ store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+ store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+ store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+ store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+ store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+ store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+ store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+ store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+ store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+ store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+ store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+ store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+ store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+ store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+ store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+ store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+ store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+ store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+ store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+ store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+ store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+ store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+ store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+ store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+ store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+ store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+ store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+ store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+ store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+ store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+ store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+ store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+ store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+ store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+ store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+ store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+ store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+ store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+ store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+ store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+ store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+ store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+ store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+ store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+ store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
+ ret void
+}
More information about the llvm-commits
mailing list