[llvm] r284939 - [X86][SSE] Add SSE41/AVX1 costs for vector shifts.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 23 09:49:05 PDT 2016


Author: rksimon
Date: Sun Oct 23 11:49:04 2016
New Revision: 284939

URL: http://llvm.org/viewvc/llvm-project?rev=284939&view=rev
Log:
[X86][SSE] Add SSE41/AVX1 costs for vector shifts.

We were defaulting to SSE2 costs which weren't taking into account the availability of PBLENDW/PBLENDVB to improve merging of per-element shift results.

Modified:
    llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
    llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
    llvm/trunk/test/Analysis/CostModel/X86/vshift-shl-cost.ll

Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=284939&r1=284938&r2=284939&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Sun Oct 23 11:49:04 2016
@@ -400,6 +400,32 @@ int X86TTIImpl::getArithmeticInstrCost(
       ISD = ISD::MUL;
   }
 
+  static const CostTblEntry SSE41CostTable[] = {
+    { ISD::SHL,  MVT::v16i8,    11 }, // pblendvb sequence.
+    { ISD::SHL,  MVT::v32i8,  2*11 }, // pblendvb sequence.
+    { ISD::SHL,  MVT::v8i16,    14 }, // pblendvb sequence.
+    { ISD::SHL,  MVT::v16i16, 2*14 }, // pblendvb sequence.
+
+    { ISD::SRL,  MVT::v16i8,    12 }, // pblendvb sequence.
+    { ISD::SRL,  MVT::v32i8,  2*12 }, // pblendvb sequence.
+    { ISD::SRL,  MVT::v8i16,    14 }, // pblendvb sequence.
+    { ISD::SRL,  MVT::v16i16, 2*14 }, // pblendvb sequence.
+    { ISD::SRL,  MVT::v4i32,    11 }, // Shift each lane + blend.
+    { ISD::SRL,  MVT::v8i32,  2*11 }, // Shift each lane + blend.
+
+    { ISD::SRA,  MVT::v16i8,    24 }, // pblendvb sequence.
+    { ISD::SRA,  MVT::v32i8,  2*24 }, // pblendvb sequence.
+    { ISD::SRA,  MVT::v8i16,    14 }, // pblendvb sequence.
+    { ISD::SRA,  MVT::v16i16, 2*14 }, // pblendvb sequence.
+    { ISD::SRA,  MVT::v4i32,    12 }, // Shift each lane + blend.
+    { ISD::SRA,  MVT::v8i32,  2*12 }, // Shift each lane + blend.
+  };
+
+  if (ST->hasSSE41()) {
+    if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+  }
+
   static const CostTblEntry SSE2CostTable[] = {
     // We don't correctly identify costs of casts because they are marked as
     // custom.

Modified: llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll?rev=284939&r1=284938&r2=284939&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll Sun Oct 23 11:49:04 2016
@@ -36,8 +36,8 @@ define <4 x i64> @var_shift_v4i64(<4 x i
 define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
 ; SSE2: Found an estimated cost of 16 for instruction:   %shift
-; SSE41: Found an estimated cost of 16 for instruction:   %shift
-; AVX: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 12 for instruction:   %shift
+; AVX: Found an estimated cost of 12 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -48,8 +48,8 @@ define <4 x i32> @var_shift_v4i32(<4 x i
 define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -60,9 +60,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i
 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
-; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 14 for instruction:   %shift
+; AVX: Found an estimated cost of 14 for instruction:   %shift
+; AVX2: Found an estimated cost of 14 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = ashr <8 x i16> %a, %b
   ret <8 x i16> %shift
@@ -71,8 +71,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i
 define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
-; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 28 for instruction:   %shift
+; AVX: Found an estimated cost of 28 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <16 x i16> %a, %b
@@ -82,9 +82,9 @@ define <16 x i16> @var_shift_v16i16(<16
 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
 ; SSE2: Found an estimated cost of 54 for instruction:   %shift
-; SSE41: Found an estimated cost of 54 for instruction:   %shift
-; AVX: Found an estimated cost of 54 for instruction:   %shift
-; AVX2: Found an estimated cost of 54 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
+; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = ashr <16 x i8> %a, %b
   ret <16 x i8> %shift
@@ -93,8 +93,8 @@ define <16 x i8> @var_shift_v16i8(<16 x
 define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
-; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 108 for instruction:   %shift
+; SSE41: Found an estimated cost of 48 for instruction:   %shift
+; AVX: Found an estimated cost of 48 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <32 x i8> %a, %b
@@ -132,8 +132,8 @@ define <4 x i64> @splatvar_shift_v4i64(<
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
 ; SSE2: Found an estimated cost of 16 for instruction:   %shift
-; SSE41: Found an estimated cost of 16 for instruction:   %shift
-; AVX: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 12 for instruction:   %shift
+; AVX: Found an estimated cost of 12 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -145,8 +145,8 @@ define <4 x i32> @splatvar_shift_v4i32(<
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -158,9 +158,9 @@ define <8 x i32> @splatvar_shift_v8i32(<
 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
-; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 14 for instruction:   %shift
+; AVX: Found an estimated cost of 14 for instruction:   %shift
+; AVX2: Found an estimated cost of 14 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
   %shift = ashr <8 x i16> %a, %splat
@@ -170,8 +170,8 @@ define <8 x i16> @splatvar_shift_v8i16(<
 define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
-; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 28 for instruction:   %shift
+; AVX: Found an estimated cost of 28 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -182,9 +182,9 @@ define <16 x i16> @splatvar_shift_v16i16
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
 ; SSE2: Found an estimated cost of 54 for instruction:   %shift
-; SSE41: Found an estimated cost of 54 for instruction:   %shift
-; AVX: Found an estimated cost of 54 for instruction:   %shift
-; AVX2: Found an estimated cost of 54 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
+; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
   %shift = ashr <16 x i8> %a, %splat
@@ -194,8 +194,8 @@ define <16 x i8> @splatvar_shift_v16i8(<
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
-; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 108 for instruction:   %shift
+; SSE41: Found an estimated cost of 48 for instruction:   %shift
+; AVX: Found an estimated cost of 48 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -232,8 +232,8 @@ define <4 x i64> @constant_shift_v4i64(<
 define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
 ; SSE2: Found an estimated cost of 16 for instruction:   %shift
-; SSE41: Found an estimated cost of 16 for instruction:   %shift
-; AVX: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 12 for instruction:   %shift
+; AVX: Found an estimated cost of 12 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -244,8 +244,8 @@ define <4 x i32> @constant_shift_v4i32(<
 define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -256,9 +256,9 @@ define <8 x i32> @constant_shift_v8i32(<
 define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
-; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 14 for instruction:   %shift
+; AVX: Found an estimated cost of 14 for instruction:   %shift
+; AVX2: Found an estimated cost of 14 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
@@ -267,8 +267,8 @@ define <8 x i16> @constant_shift_v8i16(<
 define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
-; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 28 for instruction:   %shift
+; AVX: Found an estimated cost of 28 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@@ -278,9 +278,9 @@ define <16 x i16> @constant_shift_v16i16
 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
 ; SSE2: Found an estimated cost of 54 for instruction:   %shift
-; SSE41: Found an estimated cost of 54 for instruction:   %shift
-; AVX: Found an estimated cost of 54 for instruction:   %shift
-; AVX2: Found an estimated cost of 54 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
+; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
@@ -289,8 +289,8 @@ define <16 x i8> @constant_shift_v16i8(<
 define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
-; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 108 for instruction:   %shift
+; SSE41: Found an estimated cost of 48 for instruction:   %shift
+; AVX: Found an estimated cost of 48 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>

Modified: llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll?rev=284939&r1=284938&r2=284939&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll Sun Oct 23 11:49:04 2016
@@ -38,8 +38,8 @@ define <4 x i64> @var_shift_v4i64(<4 x i
 define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
 ; SSE2: Found an estimated cost of 16 for instruction:   %shift
-; SSE41: Found an estimated cost of 16 for instruction:   %shift
-; AVX: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 11 for instruction:   %shift
+; AVX: Found an estimated cost of 11 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -50,8 +50,8 @@ define <4 x i32> @var_shift_v4i32(<4 x i
 define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 22 for instruction:   %shift
+; AVX: Found an estimated cost of 22 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -62,9 +62,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i
 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
-; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 14 for instruction:   %shift
+; AVX: Found an estimated cost of 14 for instruction:   %shift
+; AVX2: Found an estimated cost of 14 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = lshr <8 x i16> %a, %b
   ret <8 x i16> %shift
@@ -73,8 +73,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i
 define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
-; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 28 for instruction:   %shift
+; AVX: Found an estimated cost of 28 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <16 x i16> %a, %b
@@ -84,9 +84,9 @@ define <16 x i16> @var_shift_v16i16(<16
 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
 ; SSE2: Found an estimated cost of 26 for instruction:   %shift
-; SSE41: Found an estimated cost of 26 for instruction:   %shift
-; AVX: Found an estimated cost of 26 for instruction:   %shift
-; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 12 for instruction:   %shift
+; AVX: Found an estimated cost of 12 for instruction:   %shift
+; AVX2: Found an estimated cost of 12 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = lshr <16 x i8> %a, %b
   ret <16 x i8> %shift
@@ -95,8 +95,8 @@ define <16 x i8> @var_shift_v16i8(<16 x
 define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
-; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <32 x i8> %a, %b
@@ -136,8 +136,8 @@ define <4 x i64> @splatvar_shift_v4i64(<
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
 ; SSE2: Found an estimated cost of 16 for instruction:   %shift
-; SSE41: Found an estimated cost of 16 for instruction:   %shift
-; AVX: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 11 for instruction:   %shift
+; AVX: Found an estimated cost of 11 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -149,8 +149,8 @@ define <4 x i32> @splatvar_shift_v4i32(<
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 22 for instruction:   %shift
+; AVX: Found an estimated cost of 22 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -162,9 +162,9 @@ define <8 x i32> @splatvar_shift_v8i32(<
 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
-; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 14 for instruction:   %shift
+; AVX: Found an estimated cost of 14 for instruction:   %shift
+; AVX2: Found an estimated cost of 14 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
   %shift = lshr <8 x i16> %a, %splat
@@ -174,8 +174,8 @@ define <8 x i16> @splatvar_shift_v8i16(<
 define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
-; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 28 for instruction:   %shift
+; AVX: Found an estimated cost of 28 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -186,9 +186,9 @@ define <16 x i16> @splatvar_shift_v16i16
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
 ; SSE2: Found an estimated cost of 26 for instruction:   %shift
-; SSE41: Found an estimated cost of 26 for instruction:   %shift
-; AVX: Found an estimated cost of 26 for instruction:   %shift
-; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 12 for instruction:   %shift
+; AVX: Found an estimated cost of 12 for instruction:   %shift
+; AVX2: Found an estimated cost of 12 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
   %shift = lshr <16 x i8> %a, %splat
@@ -198,8 +198,8 @@ define <16 x i8> @splatvar_shift_v16i8(<
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
-; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -238,8 +238,8 @@ define <4 x i64> @constant_shift_v4i64(<
 define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
 ; SSE2: Found an estimated cost of 16 for instruction:   %shift
-; SSE41: Found an estimated cost of 16 for instruction:   %shift
-; AVX: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 11 for instruction:   %shift
+; AVX: Found an estimated cost of 11 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -250,8 +250,8 @@ define <4 x i32> @constant_shift_v4i32(<
 define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 22 for instruction:   %shift
+; AVX: Found an estimated cost of 22 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -262,9 +262,9 @@ define <8 x i32> @constant_shift_v8i32(<
 define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
-; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 14 for instruction:   %shift
+; AVX: Found an estimated cost of 14 for instruction:   %shift
+; AVX2: Found an estimated cost of 14 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
@@ -273,8 +273,8 @@ define <8 x i16> @constant_shift_v8i16(<
 define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
-; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 28 for instruction:   %shift
+; AVX: Found an estimated cost of 28 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@@ -284,9 +284,9 @@ define <16 x i16> @constant_shift_v16i16
 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
 ; SSE2: Found an estimated cost of 26 for instruction:   %shift
-; SSE41: Found an estimated cost of 26 for instruction:   %shift
-; AVX: Found an estimated cost of 26 for instruction:   %shift
-; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 12 for instruction:   %shift
+; AVX: Found an estimated cost of 12 for instruction:   %shift
+; AVX2: Found an estimated cost of 12 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
@@ -295,8 +295,8 @@ define <16 x i8> @constant_shift_v16i8(<
 define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
-; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>

Modified: llvm/trunk/test/Analysis/CostModel/X86/vshift-shl-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/vshift-shl-cost.ll?rev=284939&r1=284938&r2=284939&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/vshift-shl-cost.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/vshift-shl-cost.ll Sun Oct 23 11:49:04 2016
@@ -63,9 +63,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i
 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
-; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 14 for instruction:   %shift
+; AVX: Found an estimated cost of 14 for instruction:   %shift
+; AVX2: Found an estimated cost of 14 for instruction:   %shift
 ; XOP: Found an estimated cost of 1 for instruction:   %shift
   %shift = shl <8 x i16> %a, %b
   ret <8 x i16> %shift
@@ -74,8 +74,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i
 define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
-; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 28 for instruction:   %shift
+; AVX: Found an estimated cost of 28 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = shl <16 x i16> %a, %b
@@ -85,9 +85,9 @@ define <16 x i16> @var_shift_v16i16(<16
 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
 ; SSE2: Found an estimated cost of 26 for instruction:   %shift
-; SSE41: Found an estimated cost of 26 for instruction:   %shift
-; AVX: Found an estimated cost of 26 for instruction:   %shift
-; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 11 for instruction:   %shift
+; AVX: Found an estimated cost of 11 for instruction:   %shift
+; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 1 for instruction:   %shift
   %shift = shl <16 x i8> %a, %b
   ret <16 x i8> %shift
@@ -96,8 +96,8 @@ define <16 x i8> @var_shift_v16i8(<16 x
 define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
-; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 22 for instruction:   %shift
+; AVX: Found an estimated cost of 22 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = shl <32 x i8> %a, %b
@@ -163,9 +163,9 @@ define <8 x i32> @splatvar_shift_v8i32(<
 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
-; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 32 for instruction:   %shift
-; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 14 for instruction:   %shift
+; AVX: Found an estimated cost of 14 for instruction:   %shift
+; AVX2: Found an estimated cost of 14 for instruction:   %shift
 ; XOP: Found an estimated cost of 1 for instruction:   %shift
   %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
   %shift = shl <8 x i16> %a, %splat
@@ -175,8 +175,8 @@ define <8 x i16> @splatvar_shift_v8i16(<
 define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
-; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 28 for instruction:   %shift
+; AVX: Found an estimated cost of 28 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -187,9 +187,9 @@ define <16 x i16> @splatvar_shift_v16i16
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
 ; SSE2: Found an estimated cost of 26 for instruction:   %shift
-; SSE41: Found an estimated cost of 26 for instruction:   %shift
-; AVX: Found an estimated cost of 26 for instruction:   %shift
-; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 11 for instruction:   %shift
+; AVX: Found an estimated cost of 11 for instruction:   %shift
+; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 1 for instruction:   %shift
   %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
   %shift = shl <16 x i8> %a, %splat
@@ -199,8 +199,8 @@ define <16 x i8> @splatvar_shift_v16i8(<
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
-; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 22 for instruction:   %shift
+; AVX: Found an estimated cost of 22 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -286,9 +286,9 @@ define <16 x i16> @constant_shift_v16i16
 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
 ; SSE2: Found an estimated cost of 26 for instruction:   %shift
-; SSE41: Found an estimated cost of 26 for instruction:   %shift
-; AVX: Found an estimated cost of 26 for instruction:   %shift
-; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 11 for instruction:   %shift
+; AVX: Found an estimated cost of 11 for instruction:   %shift
+; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 1 for instruction:   %shift
   %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
@@ -297,8 +297,8 @@ define <16 x i8> @constant_shift_v16i8(<
 define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
-; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 22 for instruction:   %shift
+; AVX: Found an estimated cost of 22 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>




More information about the llvm-commits mailing list