[llvm] 1680811 - [CostModel][X86] Add BSWAP cost model estimations
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 18 08:18:07 PDT 2023
Author: Simon Pilgrim
Date: 2023-04-18T16:04:59+01:00
New Revision: 16808117c351089cf4282115c0f7c2eb4c2d0016
URL: https://github.com/llvm/llvm-project/commit/16808117c351089cf4282115c0f7c2eb4c2d0016
DIFF: https://github.com/llvm/llvm-project/commit/16808117c351089cf4282115c0f7c2eb4c2d0016.diff
LOG: [CostModel][X86] Add BSWAP cost model estimations
Use a modified version of the D103695 script to determine more accurate throughput/latency/codesize/size-latency cost estimates
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/bswap-codesize.ll
llvm/test/Analysis/CostModel/X86/bswap-latency.ll
llvm/test/Analysis/CostModel/X86/bswap-sizelatency.ll
llvm/test/Analysis/CostModel/X86/bswap.ll
llvm/test/Analysis/CostModel/X86/scalarize.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index aea8de17889b..16de19fdc294 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3400,9 +3400,15 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::v16i8, { 2, 5, 9, 9 } },
{ ISD::BITREVERSE, MVT::v32i8, { 2, 5, 9, 9 } },
{ ISD::BITREVERSE, MVT::v64i8, { 2, 5, 9, 12 } },
- { ISD::BSWAP, MVT::v8i64, { 1 } },
- { ISD::BSWAP, MVT::v16i32, { 1 } },
- { ISD::BSWAP, MVT::v32i16, { 1 } },
+ { ISD::BSWAP, MVT::v2i64, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i64, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i64, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i32, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i32, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v16i32, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i16, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v16i16, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v32i16, { 1, 1, 1, 2 } },
{ ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
{ ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
{ ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
@@ -3468,9 +3474,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::v16i32, { 9, 13, 20, 20 } },
{ ISD::BITREVERSE, MVT::v32i16, { 9, 13, 20, 20 } },
{ ISD::BITREVERSE, MVT::v64i8, { 6, 11, 17, 17 } },
- { ISD::BSWAP, MVT::v8i64, { 4 } },
- { ISD::BSWAP, MVT::v16i32, { 4 } },
- { ISD::BSWAP, MVT::v32i16, { 4 } },
+ { ISD::BSWAP, MVT::v8i64, { 4, 7, 5, 5 } },
+ { ISD::BSWAP, MVT::v16i32, { 4, 7, 5, 5 } },
+ { ISD::BSWAP, MVT::v32i16, { 4, 7, 5, 5 } },
{ ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
{ ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
{ ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
@@ -3600,9 +3606,12 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::v16i16, { 5, 11, 10, 17 } },
{ ISD::BITREVERSE, MVT::v16i8, { 3, 6, 9, 9 } },
{ ISD::BITREVERSE, MVT::v32i8, { 4, 5, 9, 15 } },
- { ISD::BSWAP, MVT::v4i64, { 1 } },
- { ISD::BSWAP, MVT::v8i32, { 1 } },
- { ISD::BSWAP, MVT::v16i16, { 1 } },
+ { ISD::BSWAP, MVT::v2i64, { 1, 2, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i64, { 1, 3, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i32, { 1, 2, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i32, { 1, 3, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i16, { 1, 2, 1, 2 } },
+ { ISD::BSWAP, MVT::v16i16, { 1, 3, 1, 2 } },
{ ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
{ ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
{ ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
@@ -3683,9 +3692,12 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::v8i16, { 8, 13, 10, 16 } },
{ ISD::BITREVERSE, MVT::v32i8, { 13, 15, 17, 26 } }, // 2 x 128-bit Op + extract/insert
{ ISD::BITREVERSE, MVT::v16i8, { 7, 7, 9, 13 } },
- { ISD::BSWAP, MVT::v4i64, { 4 } },
- { ISD::BSWAP, MVT::v8i32, { 4 } },
- { ISD::BSWAP, MVT::v16i16, { 4 } },
+ { ISD::BSWAP, MVT::v4i64, { 5, 7, 5, 10 } },
+ { ISD::BSWAP, MVT::v2i64, { 2, 3, 1, 3 } },
+ { ISD::BSWAP, MVT::v8i32, { 5, 7, 5, 10 } },
+ { ISD::BSWAP, MVT::v4i32, { 2, 3, 1, 3 } },
+ { ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
+ { ISD::BSWAP, MVT::v8i16, { 2, 2, 1, 3 } },
{ ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
{ ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } }, // 2 x 128-bit Op + extract/insert
@@ -3798,9 +3810,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::v4i32, { 16, 20, 11, 21 } },
{ ISD::BITREVERSE, MVT::v8i16, { 16, 20, 11, 21 } },
{ ISD::BITREVERSE, MVT::v16i8, { 11, 12, 10, 16 } },
- { ISD::BSWAP, MVT::v2i64, { 1 } },
- { ISD::BSWAP, MVT::v4i32, { 1 } },
- { ISD::BSWAP, MVT::v8i16, { 1 } },
+ { ISD::BSWAP, MVT::v2i64, { 5, 5, 1, 5 } },
+ { ISD::BSWAP, MVT::v4i32, { 5, 5, 1, 5 } },
+ { ISD::BSWAP, MVT::v8i16, { 5, 5, 1, 5 } },
{ ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
{ ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
{ ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
@@ -3823,9 +3835,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::v4i32, { 16, 20, 30, 30 } },
{ ISD::BITREVERSE, MVT::v8i16, { 16, 20, 25, 25 } },
{ ISD::BITREVERSE, MVT::v16i8, { 11, 12, 21, 21 } },
- { ISD::BSWAP, MVT::v2i64, { 7 } },
- { ISD::BSWAP, MVT::v4i32, { 7 } },
- { ISD::BSWAP, MVT::v8i16, { 7 } },
+ { ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
+ { ISD::BSWAP, MVT::v4i32, { 5, 5, 9, 9 } },
+ { ISD::BSWAP, MVT::v8i16, { 5, 5, 4, 5 } },
{ ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
{ ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
{ ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
@@ -3900,7 +3912,7 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
static const CostKindTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::ABS, MVT::i64, { 1, 2, 3, 4 } }, // SUB+CMOV
{ ISD::BITREVERSE, MVT::i64, { 10, 12, 20, 22 } },
- { ISD::BSWAP, MVT::i64, { 1 } },
+ { ISD::BSWAP, MVT::i64, { 1, 2, 1, 2 } },
{ ISD::CTLZ, MVT::i64, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ_ZERO_UNDEF, MVT::i64,{ 1, 1, 1, 1 } }, // BSR+XOR
{ ISD::CTTZ, MVT::i64, { 3 } }, // TEST+BSF+CMOV/BRANCH
@@ -3924,8 +3936,8 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::i32, { 9, 12, 17, 19 } },
{ ISD::BITREVERSE, MVT::i16, { 9, 12, 17, 19 } },
{ ISD::BITREVERSE, MVT::i8, { 7, 9, 13, 14 } },
- { ISD::BSWAP, MVT::i32, { 1 } },
- { ISD::BSWAP, MVT::i16, { 1 } }, // ROL
+ { ISD::BSWAP, MVT::i32, { 1, 1, 1, 1 } },
+ { ISD::BSWAP, MVT::i16, { 1, 2, 1, 2 } }, // ROL
{ ISD::CTLZ, MVT::i32, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i16, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i8, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
diff --git a/llvm/test/Analysis/CostModel/X86/bswap-codesize.ll b/llvm/test/Analysis/CostModel/X86/bswap-codesize.ll
index 2af8fc4e7eea..21789ca58364 100644
--- a/llvm/test/Analysis/CostModel/X86/bswap-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/bswap-codesize.ll
@@ -15,58 +15,58 @@
define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
; SSE2-LABEL: 'cost_bswap_i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i64'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i64'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i64'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i64'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I64 = call i64 @llvm.bswap.i64(i64 %a64)
@@ -79,58 +79,58 @@ define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64
define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
; SSE2-LABEL: 'cost_bswap_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i32'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i32'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i32'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i32'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I32 = call i32 @llvm.bswap.i32(i32 %a32)
@@ -143,58 +143,58 @@ define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i3
define void @cost_bswap_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
; SSE2-LABEL: 'cost_bswap_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i16'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i16'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i16'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i16'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i16'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I16 = call i16 @llvm.bswap.i16(i16 %a16)
diff --git a/llvm/test/Analysis/CostModel/X86/bswap-latency.ll b/llvm/test/Analysis/CostModel/X86/bswap-latency.ll
index 00c36d8791d4..370369cf7e93 100644
--- a/llvm/test/Analysis/CostModel/X86/bswap-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/bswap-latency.ll
@@ -14,59 +14,59 @@
define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
; SSE2-LABEL: 'cost_bswap_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i64'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i64'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i64'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I64 = call i64 @llvm.bswap.i64(i64 %a64)
@@ -79,58 +79,58 @@ define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64
define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
; SSE2-LABEL: 'cost_bswap_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i32'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i32'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i32'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i32'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I32 = call i32 @llvm.bswap.i32(i32 %a32)
@@ -142,59 +142,59 @@ define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i3
define void @cost_bswap_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
; SSE2-LABEL: 'cost_bswap_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I16 = call i16 @llvm.bswap.i16(i16 %a16)
diff --git a/llvm/test/Analysis/CostModel/X86/bswap-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/bswap-sizelatency.ll
index 052483b93769..7c53e250c68f 100644
--- a/llvm/test/Analysis/CostModel/X86/bswap-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/bswap-sizelatency.ll
@@ -14,59 +14,59 @@
define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
; SSE2-LABEL: 'cost_bswap_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i64'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i64'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i64'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I64 = call i64 @llvm.bswap.i64(i64 %a64)
@@ -79,58 +79,58 @@ define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64
define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
; SSE2-LABEL: 'cost_bswap_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i32'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i32'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i32'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i32'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I32 = call i32 @llvm.bswap.i32(i32 %a32)
@@ -142,59 +142,59 @@ define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i3
define void @cost_bswap_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
; SSE2-LABEL: 'cost_bswap_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I16 = call i16 @llvm.bswap.i16(i16 %a16)
diff --git a/llvm/test/Analysis/CostModel/X86/bswap.ll b/llvm/test/Analysis/CostModel/X86/bswap.ll
index 3458a1d7bdb2..2894c884bd7b 100644
--- a/llvm/test/Analysis/CostModel/X86/bswap.ll
+++ b/llvm/test/Analysis/CostModel/X86/bswap.ll
@@ -15,30 +15,30 @@
define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
; SSE2-LABEL: 'cost_bswap_i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i64'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i64'
@@ -79,30 +79,30 @@ define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64
define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
; SSE2-LABEL: 'cost_bswap_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i32'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i32'
@@ -143,30 +143,30 @@ define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i3
define void @cost_bswap_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
; SSE2-LABEL: 'cost_bswap_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i16'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i16'
diff --git a/llvm/test/Analysis/CostModel/X86/scalarize.ll b/llvm/test/Analysis/CostModel/X86/scalarize.ll
index ff24c9be4d8f..8f118bf705e9 100644
--- a/llvm/test/Analysis/CostModel/X86/scalarize.ll
+++ b/llvm/test/Analysis/CostModel/X86/scalarize.ll
@@ -21,11 +21,11 @@ declare %i8 @llvm.cttz.v2i64(%i8)
define void @test_scalarized_intrinsics() {
%r1 = add %i8 undef, undef
-; CHECK32: cost of 1 {{.*}}bswap.v4i32
-; CHECK64: cost of 1 {{.*}}bswap.v4i32
+; CHECK32: cost of 2 {{.*}}bswap.v4i32
+; CHECK64: cost of 2 {{.*}}bswap.v4i32
%r2 = call %i4 @llvm.bswap.v4i32(%i4 undef)
-; CHECK32: cost of 1 {{.*}}bswap.v2i64
-; CHECK64: cost of 1 {{.*}}bswap.v2i64
+; CHECK32: cost of 2 {{.*}}bswap.v2i64
+; CHECK64: cost of 2 {{.*}}bswap.v2i64
%r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
; CHECK32: cost of 11 {{.*}}cttz.v4i32
More information about the llvm-commits
mailing list