[llvm] r368858 - [X86][CostModel] Adjust the costs of ZERO_EXTEND/SIGN_EXTEND with less than 128-bit inputs

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 14 07:52:39 PDT 2019


Author: ctopper
Date: Wed Aug 14 07:52:39 2019
New Revision: 368858

URL: http://llvm.org/viewvc/llvm-project?rev=368858&view=rev
Log:
[X86][CostModel] Adjust the costs of ZERO_EXTEND/SIGN_EXTEND with less than 128-bit inputs

Now that we legalize by widening, the element types here won't change. Previously these were modeled as the elements being widened and then the instruction might become an AND or SHL/ASHR pair. But now they'll become something like a ZERO_EXTEND_VECTOR_INREG/SIGN_EXTEND_VECTOR_INREG.

For AVX2, when the destination type is legal its clear the cost should be 1 since we have extend instructions that can produce 256 bit vectors from less than 128 bit vectors. I'm a little less sure about AVX1 costs, but I think the ones I changed were definitely too high, but they might still be too high.

Differential Revision: https://reviews.llvm.org/D66169

Modified:
    llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/trunk/test/Analysis/CostModel/X86/cast.ll
    llvm/trunk/test/Analysis/CostModel/X86/extend.ll
    llvm/trunk/test/Analysis/CostModel/X86/min-legal-vector-width.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/sext.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/zext.ll

Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=368858&r1=368857&r2=368858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Wed Aug 14 07:52:39 2019
@@ -1314,8 +1314,10 @@ int X86TTIImpl::getCastInstrCost(unsigne
     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8,  1 },
     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
-    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i16,  1 },
+    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i8,   1 },
+    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i8,   1 },
     { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i16,  1 },
+    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i16,  1 },
     { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i32,  1 },
     { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i32,  1 },
 
@@ -1371,14 +1373,14 @@ int X86TTIImpl::getCastInstrCost(unsigne
     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i1,   3 },
     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i1,   3 },
     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i1,   3 },
-    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i8,   3 },
-    { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i8,   3 },
-    { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i8,   3 },
-    { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i8,   3 },
+    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i8,   1 },
+    { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i8,   1 },
+    { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i8,   1 },
+    { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i8,   1 },
     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8,  1 },
     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8,  1 },
-    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i16,  3 },
-    { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i16,  3 },
+    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i16,  1 },
+    { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i16,  1 },
     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i16,  1 },
     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i16,  1 },
     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i32,  1 },
@@ -1402,13 +1404,13 @@ int X86TTIImpl::getCastInstrCost(unsigne
     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i1,  4 },
     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i1,  7 },
     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i1,  4 },
-    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i8,  6 },
+    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i8,  4 },
     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i8,  4 },
-    { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i8,  7 },
+    { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i8,  4 },
     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i8,  4 },
     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
-    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i16, 6 },
+    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i16, 4 },
     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i16, 3 },
     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i16, 4 },
     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i16, 4 },

Modified: llvm/trunk/test/Analysis/CostModel/X86/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/cast.ll?rev=368858&r1=368857&r2=368858&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/cast.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/cast.ll Wed Aug 14 07:52:39 2019
@@ -139,11 +139,11 @@ define i32 @zext_sext(<8 x i1> %in) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %B = zext <8 x i16> undef to <8 x i32>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %D = zext <4 x i32> undef to <4 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %D1 = zext <8 x i32> undef to <8 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %D2 = sext <8 x i32> undef to <8 x i64>
@@ -168,12 +168,12 @@ define i32 @zext_sext(<8 x i1> %in) {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %D1 = zext <8 x i32> undef to <8 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %D2 = sext <8 x i32> undef to <8 x i64>
@@ -198,12 +198,12 @@ define i32 @zext_sext(<8 x i1> %in) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %D1 = zext <8 x i32> undef to <8 x i64>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %D2 = sext <8 x i32> undef to <8 x i64>

Modified: llvm/trunk/test/Analysis/CostModel/X86/extend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/extend.ll?rev=368858&r1=368857&r2=368858&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/extend.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/extend.ll Wed Aug 14 07:52:39 2019
@@ -104,8 +104,8 @@ define i32 @zext_vXi16() {
 ;
 ; AVX2-LABEL: 'zext_vXi16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32>
@@ -114,7 +114,7 @@ define i32 @zext_vXi16() {
 ;
 ; AVX512-LABEL: 'zext_vXi16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32>
@@ -207,11 +207,11 @@ define i32 @zext_vXi8() {
 ;
 ; AVX2-LABEL: 'zext_vXi8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16>
@@ -222,11 +222,11 @@ define i32 @zext_vXi8() {
 ;
 ; AVX512F-LABEL: 'zext_vXi8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16>
@@ -237,11 +237,11 @@ define i32 @zext_vXi8() {
 ;
 ; AVX512BW-LABEL: 'zext_vXi8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16>
@@ -518,8 +518,8 @@ define i32 @sext_vXi16() {
 ;
 ; AVX1-LABEL: 'sext_vXi16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>
@@ -528,8 +528,8 @@ define i32 @sext_vXi16() {
 ;
 ; AVX2-LABEL: 'sext_vXi16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>
@@ -538,7 +538,7 @@ define i32 @sext_vXi16() {
 ;
 ; AVX512-LABEL: 'sext_vXi16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>
@@ -548,8 +548,8 @@ define i32 @sext_vXi16() {
 ;
 ; BTVER2-LABEL: 'sext_vXi16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>
@@ -616,11 +616,11 @@ define i32 @sext_vXi8() {
 ;
 ; AVX1-LABEL: 'sext_vXi8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>
@@ -631,11 +631,11 @@ define i32 @sext_vXi8() {
 ;
 ; AVX2-LABEL: 'sext_vXi8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>
@@ -646,11 +646,11 @@ define i32 @sext_vXi8() {
 ;
 ; AVX512F-LABEL: 'sext_vXi8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>
@@ -661,11 +661,11 @@ define i32 @sext_vXi8() {
 ;
 ; AVX512BW-LABEL: 'sext_vXi8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>
@@ -676,11 +676,11 @@ define i32 @sext_vXi8() {
 ;
 ; BTVER2-LABEL: 'sext_vXi8'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>

Modified: llvm/trunk/test/Analysis/CostModel/X86/min-legal-vector-width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/min-legal-vector-width.ll?rev=368858&r1=368857&r2=368858&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/min-legal-vector-width.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/min-legal-vector-width.ll Wed Aug 14 07:52:39 2019
@@ -5,7 +5,7 @@
 
 define void @zext256() "min-legal-vector-width"="256" {
 ; VEC256-LABEL: 'zext256'
-; VEC256-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %A = zext <8 x i16> undef to <8 x i64>
+; VEC256-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %A = zext <8 x i16> undef to <8 x i64>
 ; VEC256-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64>
 ; VEC256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %C = zext <16 x i8> undef to <16 x i32>
 ; VEC256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %D = zext <16 x i16> undef to <16 x i32>
@@ -30,7 +30,7 @@ define void @zext256() "min-legal-vector
 
 define void @zext512() "min-legal-vector-width"="512" {
 ; AVX-LABEL: 'zext512'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %A = zext <8 x i16> undef to <8 x i64>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %A = zext <8 x i16> undef to <8 x i64>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %C = zext <16 x i8> undef to <16 x i32>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %D = zext <16 x i16> undef to <16 x i32>
@@ -63,8 +63,8 @@ define void @zext512() "min-legal-vector
 
 define void @sext256() "min-legal-vector-width"="256" {
 ; VEC256-LABEL: 'sext256'
-; VEC256-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %A = sext <8 x i8> undef to <8 x i64>
-; VEC256-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %B = sext <8 x i16> undef to <8 x i64>
+; VEC256-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %A = sext <8 x i8> undef to <8 x i64>
+; VEC256-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %B = sext <8 x i16> undef to <8 x i64>
 ; VEC256-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64>
 ; VEC256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %D = sext <16 x i8> undef to <16 x i32>
 ; VEC256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %E = sext <16 x i16> undef to <16 x i32>
@@ -72,7 +72,7 @@ define void @sext256() "min-legal-vector
 ; VEC256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; VEC512-LABEL: 'sext256'
-; VEC512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %A = sext <8 x i8> undef to <8 x i64>
+; VEC512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i8> undef to <8 x i64>
 ; VEC512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64>
 ; VEC512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64>
 ; VEC512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32>
@@ -91,8 +91,8 @@ define void @sext256() "min-legal-vector
 
 define void @sext512() "min-legal-vector-width"="512" {
 ; AVX-LABEL: 'sext512'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %A = sext <8 x i8> undef to <8 x i64>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %B = sext <8 x i16> undef to <8 x i64>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %A = sext <8 x i8> undef to <8 x i64>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %B = sext <8 x i16> undef to <8 x i64>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %D = sext <16 x i8> undef to <16 x i32>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %E = sext <16 x i16> undef to <16 x i32>
@@ -100,7 +100,7 @@ define void @sext512() "min-legal-vector
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SKX256-LABEL: 'sext512'
-; SKX256-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %A = sext <8 x i8> undef to <8 x i64>
+; SKX256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i8> undef to <8 x i64>
 ; SKX256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64>
 ; SKX256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64>
 ; SKX256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32>
@@ -109,7 +109,7 @@ define void @sext512() "min-legal-vector
 ; SKX256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; VEC512-LABEL: 'sext512'
-; VEC512-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %A = sext <8 x i8> undef to <8 x i64>
+; VEC512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i8> undef to <8 x i64>
 ; VEC512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64>
 ; VEC512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64>
 ; VEC512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32>

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll?rev=368858&r1=368857&r2=368858&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll Wed Aug 14 07:52:39 2019
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -basicaa -slp-vectorizer -dce -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -basicaa -slp-vectorizer -dce -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -basicaa -slp-vectorizer -dce -S | FileCheck %s --check-prefixes=CHECK,SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -basicaa -slp-vectorizer -dce -S | FileCheck %s --check-prefixes=CHECK,AVX
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -76,21 +76,30 @@ entry:
 }
 
 define i64 @test_sext_4i16_to_4i64(i64* noalias nocapture %A, i16* noalias nocapture %B) {
-; CHECK-LABEL: @test_sext_4i16_to_4i64(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[B:%.*]] to <2 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* [[TMP0]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* [[TMP3]], align 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 2
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 2
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX5]] to <2 x i16>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i16>, <2 x i16>* [[TMP4]], align 1
-; CHECK-NEXT:    [[TMP6:%.*]] = sext <2 x i16> [[TMP5]] to <2 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[ARRAYIDX7]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
-; CHECK-NEXT:    ret i64 undef
+; SSE42-LABEL: @test_sext_4i16_to_4i64(
+; SSE42-NEXT:  entry:
+; SSE42-NEXT:    [[TMP0:%.*]] = bitcast i16* [[B:%.*]] to <2 x i16>*
+; SSE42-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* [[TMP0]], align 1
+; SSE42-NEXT:    [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i64>
+; SSE42-NEXT:    [[TMP3:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
+; SSE42-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* [[TMP3]], align 4
+; SSE42-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 2
+; SSE42-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 2
+; SSE42-NEXT:    [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX5]] to <2 x i16>*
+; SSE42-NEXT:    [[TMP5:%.*]] = load <2 x i16>, <2 x i16>* [[TMP4]], align 1
+; SSE42-NEXT:    [[TMP6:%.*]] = sext <2 x i16> [[TMP5]] to <2 x i64>
+; SSE42-NEXT:    [[TMP7:%.*]] = bitcast i64* [[ARRAYIDX7]] to <2 x i64>*
+; SSE42-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
+; SSE42-NEXT:    ret i64 undef
+;
+; AVX-LABEL: @test_sext_4i16_to_4i64(
+; AVX-NEXT:  entry:
+; AVX-NEXT:    [[TMP0:%.*]] = bitcast i16* [[B:%.*]] to <4 x i16>*
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i64>
+; AVX-NEXT:    [[TMP3:%.*]] = bitcast i64* [[A:%.*]] to <4 x i64>*
+; AVX-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* [[TMP3]], align 4
+; AVX-NEXT:    ret i64 undef
 ;
 entry:
   %0 = load i16, i16* %B, align 1

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/sext.ll?rev=368858&r1=368857&r2=368858&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/sext.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/sext.ll Wed Aug 14 07:52:39 2019
@@ -160,24 +160,58 @@ define <4 x i64> @loadext_4i8_to_4i64(i8
 ; SLM-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
 ; SLM-NEXT:    ret <4 x i64> [[V3]]
 ;
-; AVX-LABEL: @loadext_4i8_to_4i64(
-; AVX-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
-; AVX-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
-; AVX-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
-; AVX-NEXT:    [[I2:%.*]] = load i8, i8* [[P2]], align 1
-; AVX-NEXT:    [[I3:%.*]] = load i8, i8* [[P3]], align 1
-; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
-; AVX-NEXT:    [[X2:%.*]] = sext i8 [[I2]] to i64
-; AVX-NEXT:    [[X3:%.*]] = sext i8 [[I3]] to i64
-; AVX-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; AVX-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
-; AVX-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
-; AVX-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
-; AVX-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
-; AVX-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
-; AVX-NEXT:    ret <4 x i64> [[V3]]
+; AVX1-LABEL: @loadext_4i8_to_4i64(
+; AVX1-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
+; AVX1-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
+; AVX1-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
+; AVX1-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
+; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
+; AVX1-NEXT:    [[I2:%.*]] = load i8, i8* [[P2]], align 1
+; AVX1-NEXT:    [[I3:%.*]] = load i8, i8* [[P3]], align 1
+; AVX1-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
+; AVX1-NEXT:    [[X2:%.*]] = sext i8 [[I2]] to i64
+; AVX1-NEXT:    [[X3:%.*]] = sext i8 [[I3]] to i64
+; AVX1-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; AVX1-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX1-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; AVX1-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX1-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
+; AVX1-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
+; AVX1-NEXT:    ret <4 x i64> [[V3]]
+;
+; AVX2-LABEL: @loadext_4i8_to_4i64(
+; AVX2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
+; AVX2-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
+; AVX2-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
+; AVX2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
+; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX2-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
+; AVX2-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; AVX2-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX2-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; AVX2-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX2-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; AVX2-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
+; AVX2-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; AVX2-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
+; AVX2-NEXT:    ret <4 x i64> [[V3]]
+;
+; AVX512-LABEL: @loadext_4i8_to_4i64(
+; AVX512-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
+; AVX512-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
+; AVX512-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
+; AVX512-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
+; AVX512-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX512-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
+; AVX512-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; AVX512-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX512-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; AVX512-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX512-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; AVX512-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
+; AVX512-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; AVX512-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
+; AVX512-NEXT:    ret <4 x i64> [[V3]]
 ;
   %p1 = getelementptr inbounds i8, i8* %p0, i64 1
   %p2 = getelementptr inbounds i8, i8* %p0, i64 2
@@ -262,125 +296,34 @@ define <8 x i16> @loadext_8i8_to_8i16(i8
 }
 
 define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
-; SSE-LABEL: @loadext_8i8_to_8i32(
-; SSE-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
-; SSE-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
-; SSE-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
-; SSE-NEXT:    [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
-; SSE-NEXT:    [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
-; SSE-NEXT:    [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
-; SSE-NEXT:    [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
-; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
-; SSE-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
-; SSE-NEXT:    [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
-; SSE-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
-; SSE-NEXT:    [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
-; SSE-NEXT:    [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
-; SSE-NEXT:    [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
-; SSE-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
-; SSE-NEXT:    [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
-; SSE-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
-; SSE-NEXT:    [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
-; SSE-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
-; SSE-NEXT:    [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
-; SSE-NEXT:    [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
-; SSE-NEXT:    [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
-; SSE-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
-; SSE-NEXT:    [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
-; SSE-NEXT:    ret <8 x i32> [[V7]]
-;
-; AVX1-LABEL: @loadext_8i8_to_8i32(
-; AVX1-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
-; AVX1-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
-; AVX1-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
-; AVX1-NEXT:    [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
-; AVX1-NEXT:    [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
-; AVX1-NEXT:    [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
-; AVX1-NEXT:    [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
-; AVX1-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
-; AVX1-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
-; AVX1-NEXT:    [[I4:%.*]] = load i8, i8* [[P4]], align 1
-; AVX1-NEXT:    [[I5:%.*]] = load i8, i8* [[P5]], align 1
-; AVX1-NEXT:    [[I6:%.*]] = load i8, i8* [[P6]], align 1
-; AVX1-NEXT:    [[I7:%.*]] = load i8, i8* [[P7]], align 1
-; AVX1-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
-; AVX1-NEXT:    [[X4:%.*]] = sext i8 [[I4]] to i32
-; AVX1-NEXT:    [[X5:%.*]] = sext i8 [[I5]] to i32
-; AVX1-NEXT:    [[X6:%.*]] = sext i8 [[I6]] to i32
-; AVX1-NEXT:    [[X7:%.*]] = sext i8 [[I7]] to i32
-; AVX1-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
-; AVX1-NEXT:    [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
-; AVX1-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
-; AVX1-NEXT:    [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
-; AVX1-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
-; AVX1-NEXT:    [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
-; AVX1-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
-; AVX1-NEXT:    [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
-; AVX1-NEXT:    [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[X4]], i32 4
-; AVX1-NEXT:    [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[X5]], i32 5
-; AVX1-NEXT:    [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[X6]], i32 6
-; AVX1-NEXT:    [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[X7]], i32 7
-; AVX1-NEXT:    ret <8 x i32> [[V7]]
-;
-; AVX2-LABEL: @loadext_8i8_to_8i32(
-; AVX2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
-; AVX2-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
-; AVX2-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
-; AVX2-NEXT:    [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
-; AVX2-NEXT:    [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
-; AVX2-NEXT:    [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
-; AVX2-NEXT:    [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
-; AVX2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
-; AVX2-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
-; AVX2-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
-; AVX2-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
-; AVX2-NEXT:    [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
-; AVX2-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
-; AVX2-NEXT:    [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
-; AVX2-NEXT:    [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
-; AVX2-NEXT:    [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
-; AVX2-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
-; AVX2-NEXT:    [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
-; AVX2-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
-; AVX2-NEXT:    [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
-; AVX2-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
-; AVX2-NEXT:    [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
-; AVX2-NEXT:    [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
-; AVX2-NEXT:    [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
-; AVX2-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
-; AVX2-NEXT:    [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
-; AVX2-NEXT:    ret <8 x i32> [[V7]]
-;
-; AVX512-LABEL: @loadext_8i8_to_8i32(
-; AVX512-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
-; AVX512-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
-; AVX512-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
-; AVX512-NEXT:    [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
-; AVX512-NEXT:    [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
-; AVX512-NEXT:    [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
-; AVX512-NEXT:    [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
-; AVX512-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
-; AVX512-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
-; AVX512-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
-; AVX512-NEXT:    [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
-; AVX512-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
-; AVX512-NEXT:    [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
-; AVX512-NEXT:    [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
-; AVX512-NEXT:    [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
-; AVX512-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
-; AVX512-NEXT:    [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
-; AVX512-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
-; AVX512-NEXT:    [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
-; AVX512-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
-; AVX512-NEXT:    [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
-; AVX512-NEXT:    [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
-; AVX512-NEXT:    [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
-; AVX512-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
-; AVX512-NEXT:    [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
-; AVX512-NEXT:    ret <8 x i32> [[V7]]
+; CHECK-LABEL: @loadext_8i8_to_8i32(
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
+; CHECK-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
+; CHECK-NEXT:    [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
+; CHECK-NEXT:    [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
+; CHECK-NEXT:    [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
+; CHECK-NEXT:    [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
+; CHECK-NEXT:    [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
+; CHECK-NEXT:    [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
+; CHECK-NEXT:    [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
+; CHECK-NEXT:    [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
+; CHECK-NEXT:    [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
+; CHECK-NEXT:    [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
+; CHECK-NEXT:    [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
+; CHECK-NEXT:    [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
+; CHECK-NEXT:    ret <8 x i32> [[V7]]
 ;
   %p1 = getelementptr inbounds i8, i8* %p0, i64 1
   %p2 = getelementptr inbounds i8, i8* %p0, i64 2
@@ -655,24 +598,58 @@ define <4 x i64> @loadext_4i16_to_4i64(i
 ; SLM-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
 ; SLM-NEXT:    ret <4 x i64> [[V3]]
 ;
-; AVX-LABEL: @loadext_4i16_to_4i64(
-; AVX-NEXT:    [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
-; AVX-NEXT:    [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
-; AVX-NEXT:    [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
-; AVX-NEXT:    [[I2:%.*]] = load i16, i16* [[P2]], align 1
-; AVX-NEXT:    [[I3:%.*]] = load i16, i16* [[P3]], align 1
-; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
-; AVX-NEXT:    [[X2:%.*]] = sext i16 [[I2]] to i64
-; AVX-NEXT:    [[X3:%.*]] = sext i16 [[I3]] to i64
-; AVX-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; AVX-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
-; AVX-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
-; AVX-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
-; AVX-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
-; AVX-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
-; AVX-NEXT:    ret <4 x i64> [[V3]]
+; AVX1-LABEL: @loadext_4i16_to_4i64(
+; AVX1-NEXT:    [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
+; AVX1-NEXT:    [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
+; AVX1-NEXT:    [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
+; AVX1-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
+; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; AVX1-NEXT:    [[I2:%.*]] = load i16, i16* [[P2]], align 1
+; AVX1-NEXT:    [[I3:%.*]] = load i16, i16* [[P3]], align 1
+; AVX1-NEXT:    [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
+; AVX1-NEXT:    [[X2:%.*]] = sext i16 [[I2]] to i64
+; AVX1-NEXT:    [[X3:%.*]] = sext i16 [[I3]] to i64
+; AVX1-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; AVX1-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX1-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; AVX1-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX1-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
+; AVX1-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
+; AVX1-NEXT:    ret <4 x i64> [[V3]]
+;
+; AVX2-LABEL: @loadext_4i16_to_4i64(
+; AVX2-NEXT:    [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
+; AVX2-NEXT:    [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
+; AVX2-NEXT:    [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
+; AVX2-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
+; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX2-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
+; AVX2-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; AVX2-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX2-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; AVX2-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX2-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; AVX2-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
+; AVX2-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; AVX2-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
+; AVX2-NEXT:    ret <4 x i64> [[V3]]
+;
+; AVX512-LABEL: @loadext_4i16_to_4i64(
+; AVX512-NEXT:    [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
+; AVX512-NEXT:    [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
+; AVX512-NEXT:    [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
+; AVX512-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
+; AVX512-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX512-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
+; AVX512-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; AVX512-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX512-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; AVX512-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX512-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; AVX512-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
+; AVX512-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; AVX512-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
+; AVX512-NEXT:    ret <4 x i64> [[V3]]
 ;
   %p1 = getelementptr inbounds i16, i16* %p0, i64 1
   %p2 = getelementptr inbounds i16, i16* %p0, i64 2

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/zext.ll?rev=368858&r1=368857&r2=368858&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/zext.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/zext.ll Wed Aug 14 07:52:39 2019
@@ -125,24 +125,58 @@ define <4 x i64> @loadext_4i8_to_4i64(i8
 ; SLM-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
 ; SLM-NEXT:    ret <4 x i64> [[V3]]
 ;
-; AVX-LABEL: @loadext_4i8_to_4i64(
-; AVX-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
-; AVX-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
-; AVX-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
-; AVX-NEXT:    [[I2:%.*]] = load i8, i8* [[P2]], align 1
-; AVX-NEXT:    [[I3:%.*]] = load i8, i8* [[P3]], align 1
-; AVX-NEXT:    [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
-; AVX-NEXT:    [[X2:%.*]] = zext i8 [[I2]] to i64
-; AVX-NEXT:    [[X3:%.*]] = zext i8 [[I3]] to i64
-; AVX-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; AVX-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
-; AVX-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
-; AVX-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
-; AVX-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
-; AVX-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
-; AVX-NEXT:    ret <4 x i64> [[V3]]
+; AVX1-LABEL: @loadext_4i8_to_4i64(
+; AVX1-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
+; AVX1-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
+; AVX1-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
+; AVX1-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
+; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
+; AVX1-NEXT:    [[I2:%.*]] = load i8, i8* [[P2]], align 1
+; AVX1-NEXT:    [[I3:%.*]] = load i8, i8* [[P3]], align 1
+; AVX1-NEXT:    [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
+; AVX1-NEXT:    [[X2:%.*]] = zext i8 [[I2]] to i64
+; AVX1-NEXT:    [[X3:%.*]] = zext i8 [[I3]] to i64
+; AVX1-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; AVX1-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX1-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; AVX1-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX1-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
+; AVX1-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
+; AVX1-NEXT:    ret <4 x i64> [[V3]]
+;
+; AVX2-LABEL: @loadext_4i8_to_4i64(
+; AVX2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
+; AVX2-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
+; AVX2-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
+; AVX2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
+; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX2-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
+; AVX2-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; AVX2-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX2-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; AVX2-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX2-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; AVX2-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
+; AVX2-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; AVX2-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
+; AVX2-NEXT:    ret <4 x i64> [[V3]]
+;
+; AVX512-LABEL: @loadext_4i8_to_4i64(
+; AVX512-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
+; AVX512-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
+; AVX512-NEXT:    [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
+; AVX512-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>*
+; AVX512-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX512-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
+; AVX512-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; AVX512-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX512-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; AVX512-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX512-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; AVX512-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
+; AVX512-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; AVX512-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
+; AVX512-NEXT:    ret <4 x i64> [[V3]]
 ;
   %p1 = getelementptr inbounds i8, i8* %p0, i64 1
   %p2 = getelementptr inbounds i8, i8* %p0, i64 2
@@ -529,24 +563,58 @@ define <4 x i64> @loadext_4i16_to_4i64(i
 ; SLM-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
 ; SLM-NEXT:    ret <4 x i64> [[V3]]
 ;
-; AVX-LABEL: @loadext_4i16_to_4i64(
-; AVX-NEXT:    [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
-; AVX-NEXT:    [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
-; AVX-NEXT:    [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
-; AVX-NEXT:    [[I2:%.*]] = load i16, i16* [[P2]], align 1
-; AVX-NEXT:    [[I3:%.*]] = load i16, i16* [[P3]], align 1
-; AVX-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
-; AVX-NEXT:    [[X2:%.*]] = zext i16 [[I2]] to i64
-; AVX-NEXT:    [[X3:%.*]] = zext i16 [[I3]] to i64
-; AVX-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; AVX-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
-; AVX-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
-; AVX-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
-; AVX-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
-; AVX-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
-; AVX-NEXT:    ret <4 x i64> [[V3]]
+; AVX1-LABEL: @loadext_4i16_to_4i64(
+; AVX1-NEXT:    [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
+; AVX1-NEXT:    [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
+; AVX1-NEXT:    [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
+; AVX1-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
+; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; AVX1-NEXT:    [[I2:%.*]] = load i16, i16* [[P2]], align 1
+; AVX1-NEXT:    [[I3:%.*]] = load i16, i16* [[P3]], align 1
+; AVX1-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
+; AVX1-NEXT:    [[X2:%.*]] = zext i16 [[I2]] to i64
+; AVX1-NEXT:    [[X3:%.*]] = zext i16 [[I3]] to i64
+; AVX1-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; AVX1-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX1-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; AVX1-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX1-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
+; AVX1-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
+; AVX1-NEXT:    ret <4 x i64> [[V3]]
+;
+; AVX2-LABEL: @loadext_4i16_to_4i64(
+; AVX2-NEXT:    [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
+; AVX2-NEXT:    [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
+; AVX2-NEXT:    [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
+; AVX2-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
+; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX2-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
+; AVX2-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; AVX2-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX2-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; AVX2-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX2-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; AVX2-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
+; AVX2-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; AVX2-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
+; AVX2-NEXT:    ret <4 x i64> [[V3]]
+;
+; AVX512-LABEL: @loadext_4i16_to_4i64(
+; AVX512-NEXT:    [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
+; AVX512-NEXT:    [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
+; AVX512-NEXT:    [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
+; AVX512-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
+; AVX512-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX512-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
+; AVX512-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; AVX512-NEXT:    [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
+; AVX512-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; AVX512-NEXT:    [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
+; AVX512-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; AVX512-NEXT:    [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
+; AVX512-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; AVX512-NEXT:    [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
+; AVX512-NEXT:    ret <4 x i64> [[V3]]
 ;
   %p1 = getelementptr inbounds i16, i16* %p0, i64 1
   %p2 = getelementptr inbounds i16, i16* %p0, i64 2




More information about the llvm-commits mailing list