[llvm] r335364 - [SLPVectorizer] Support alternate opcodes in tryToVectorizeList

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 22 09:37:34 PDT 2018


Author: rksimon
Date: Fri Jun 22 09:37:34 2018
New Revision: 335364

URL: http://llvm.org/viewvc/llvm-project?rev=335364&view=rev
Log:
[SLPVectorizer] Support alternate opcodes in tryToVectorizeList

Enable tryToVectorizeList to support InstructionsState alternate opcode patterns at a root (build vector etc.) as well as further down the vectorization tree.

NOTE: This patch reduces some of the debug reporting if there are opcode mismatches - I can try to add it back if it proves a problem. But it could get rather messy trying to provide equivalent verbose debug strings via getSameOpcode etc.

Differential Revision: https://reviews.llvm.org/D48488

Modified:
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/trunk/test/Transforms/SLPVectorizer/X86/alternate-fp.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/alternate-int.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/resched.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=335364&r1=335363&r2=335364&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Fri Jun 22 09:37:34 2018
@@ -4824,13 +4824,13 @@ bool SLPVectorizerPass::tryToVectorizeLi
   LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = "
                     << VL.size() << ".\n");
 
-  // Check that all of the parts are scalar instructions of the same type.
-  Instruction *I0 = dyn_cast<Instruction>(VL[0]);
-  if (!I0)
+  // Check that all of the parts are scalar instructions of the same type,
+  // we permit an alternate opcode via InstructionsState.
+  InstructionsState S = getSameOpcode(VL);
+  if (!S.Opcode)
     return false;
 
-  unsigned Opcode0 = I0->getOpcode();
-
+  Instruction *I0 = cast<Instruction>(S.OpValue);
   unsigned Sz = R.getVectorElementSize(I0);
   unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
   unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
@@ -4847,30 +4847,15 @@ bool SLPVectorizerPass::tryToVectorizeLi
   for (Value *V : VL) {
     Type *Ty = V->getType();
     if (!isValidElementType(Ty)) {
-      // NOTE: the following will give user internal llvm type name, which may not be useful
-      R.getORE()->emit([&]() {
-          std::string type_str;
-          llvm::raw_string_ostream rso(type_str);
-          Ty->print(rso);
-          return OptimizationRemarkMissed(
-                     SV_NAME, "UnsupportedType", I0)
-                 << "Cannot SLP vectorize list: type "
-                 << rso.str() + " is unsupported by vectorizer";
-      });
-      return false;
-    }
-    Instruction *Inst = dyn_cast<Instruction>(V);
-
-    if (!Inst)
-      return false;
-    if (Inst->getOpcode() != Opcode0) {
+      // NOTE: the following will give user internal llvm type name, which may
+      // not be useful.
       R.getORE()->emit([&]() {
-          return OptimizationRemarkMissed(
-                     SV_NAME, "InequableTypes", I0)
-                 << "Cannot SLP vectorize list: not all of the "
-                 << "parts of scalar instructions are of the same type: "
-                 << ore::NV("Instruction1Opcode", I0) << " and "
-                 << ore::NV("Instruction2Opcode", Inst);
+        std::string type_str;
+        llvm::raw_string_ostream rso(type_str);
+        Ty->print(rso);
+        return OptimizationRemarkMissed(SV_NAME, "UnsupportedType", I0)
+               << "Cannot SLP vectorize list: type "
+               << rso.str() + " is unsupported by vectorizer";
       });
       return false;
     }

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/alternate-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/alternate-fp.ll?rev=335364&r1=335363&r2=335364&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/alternate-fp.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/alternate-fp.ll Fri Jun 22 09:37:34 2018
@@ -8,38 +8,9 @@
 
 define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-LABEL: @fadd_fsub_v8f32(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <8 x float> [[B]], i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <8 x float> [[B]], i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <8 x float> [[B]], i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <8 x float> [[B]], i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <8 x float> [[B]], i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <8 x float> [[B]], i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <8 x float> [[B]], i32 7
-; CHECK-NEXT:    [[AB0:%.*]] = fadd float [[A0]], [[B0]]
-; CHECK-NEXT:    [[AB1:%.*]] = fsub float [[A1]], [[B1]]
-; CHECK-NEXT:    [[AB2:%.*]] = fsub float [[A2]], [[B2]]
-; CHECK-NEXT:    [[AB3:%.*]] = fadd float [[A3]], [[B3]]
-; CHECK-NEXT:    [[AB4:%.*]] = fadd float [[A4]], [[B4]]
-; CHECK-NEXT:    [[AB5:%.*]] = fsub float [[A5]], [[B5]]
-; CHECK-NEXT:    [[AB6:%.*]] = fsub float [[A6]], [[B6]]
-; CHECK-NEXT:    [[AB7:%.*]] = fadd float [[A7]], [[B7]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
+; CHECK-NEXT:    [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> <i32 8, i32 1, i32 2, i32 11, i32 12, i32 5, i32 6, i32 15>
 ; CHECK-NEXT:    ret <8 x float> [[R7]]
 ;
   %a0 = extractelement <8 x float> %a, i32 0
@@ -78,40 +49,40 @@ define <8 x float> @fadd_fsub_v8f32(<8 x
 }
 
 define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
-; CHECK-LABEL: @fmul_fdiv_v8f32(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <8 x float> [[B]], i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <8 x float> [[B]], i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <8 x float> [[B]], i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <8 x float> [[B]], i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <8 x float> [[B]], i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <8 x float> [[B]], i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <8 x float> [[B]], i32 7
-; CHECK-NEXT:    [[AB0:%.*]] = fmul float [[A0]], [[B0]]
-; CHECK-NEXT:    [[AB1:%.*]] = fdiv float [[A1]], [[B1]]
-; CHECK-NEXT:    [[AB2:%.*]] = fdiv float [[A2]], [[B2]]
-; CHECK-NEXT:    [[AB3:%.*]] = fmul float [[A3]], [[B3]]
-; CHECK-NEXT:    [[AB4:%.*]] = fmul float [[A4]], [[B4]]
-; CHECK-NEXT:    [[AB5:%.*]] = fdiv float [[A5]], [[B5]]
-; CHECK-NEXT:    [[AB6:%.*]] = fdiv float [[A6]], [[B6]]
-; CHECK-NEXT:    [[AB7:%.*]] = fmul float [[A7]], [[B7]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
-; CHECK-NEXT:    ret <8 x float> [[R7]]
+; SSE-LABEL: @fmul_fdiv_v8f32(
+; SSE-NEXT:    [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
+; SSE-NEXT:    [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
+; SSE-NEXT:    [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> <i32 8, i32 1, i32 2, i32 11, i32 12, i32 5, i32 6, i32 15>
+; SSE-NEXT:    ret <8 x float> [[R7]]
+;
+; SLM-LABEL: @fmul_fdiv_v8f32(
+; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SLM-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SLM-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; SLM-NEXT:    [[TMP4:%.*]] = fdiv <4 x float> [[TMP1]], [[TMP2]]
+; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    [[TMP6:%.*]] = shufflevector <8 x float> [[B]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
+; SLM-NEXT:    [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; SLM-NEXT:    [[TMP9:%.*]] = fdiv <4 x float> [[TMP5]], [[TMP6]]
+; SLM-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; SLM-NEXT:    [[R3:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP3]], <8 x i32> <i32 4, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; SLM-NEXT:    [[R4:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 undef, i32 undef, i32 undef>
+; SLM-NEXT:    [[R6:%.*]] = shufflevector <8 x float> [[R4]], <8 x float> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 9, i32 10, i32 undef>
+; SLM-NEXT:    [[R7:%.*]] = shufflevector <8 x float> [[R6]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
+; SLM-NEXT:    ret <8 x float> [[R7]]
+;
+; AVX-LABEL: @fmul_fdiv_v8f32(
+; AVX-NEXT:    [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
+; AVX-NEXT:    [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
+; AVX-NEXT:    [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> <i32 8, i32 1, i32 2, i32 11, i32 12, i32 5, i32 6, i32 15>
+; AVX-NEXT:    ret <8 x float> [[R7]]
+;
+; AVX512-LABEL: @fmul_fdiv_v8f32(
+; AVX512-NEXT:    [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
+; AVX512-NEXT:    [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
+; AVX512-NEXT:    [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> <i32 8, i32 1, i32 2, i32 11, i32 12, i32 5, i32 6, i32 15>
+; AVX512-NEXT:    ret <8 x float> [[R7]]
 ;
   %a0 = extractelement <8 x float> %a, i32 0
   %a1 = extractelement <8 x float> %a, i32 1
@@ -149,60 +120,9 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x
 }
 
 define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) {
-; SSE-LABEL: @fmul_fdiv_v4f32_const(
-; SSE-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 2
-; SSE-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
-; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
-; SSE-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 1.000000e+00>
-; SSE-NEXT:    [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
-; SSE-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
-; SSE-NEXT:    [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
-; SSE-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-; SSE-NEXT:    [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[TMP4]], i32 1
-; SSE-NEXT:    [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
-; SSE-NEXT:    [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
-; SSE-NEXT:    ret <4 x float> [[R3]]
-;
-; SLM-LABEL: @fmul_fdiv_v4f32_const(
-; SLM-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
-; SLM-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
-; SLM-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
-; SLM-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
-; SLM-NEXT:    [[AB0:%.*]] = fmul float [[A0]], 2.000000e+00
-; SLM-NEXT:    [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
-; SLM-NEXT:    [[R0:%.*]] = insertelement <4 x float> undef, float [[AB0]], i32 0
-; SLM-NEXT:    [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[A1]], i32 1
-; SLM-NEXT:    [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
-; SLM-NEXT:    [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
-; SLM-NEXT:    ret <4 x float> [[R3]]
-;
-; AVX-LABEL: @fmul_fdiv_v4f32_const(
-; AVX-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 2
-; AVX-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
-; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
-; AVX-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 1.000000e+00>
-; AVX-NEXT:    [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
-; AVX-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
-; AVX-NEXT:    [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
-; AVX-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-; AVX-NEXT:    [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[TMP4]], i32 1
-; AVX-NEXT:    [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
-; AVX-NEXT:    [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
-; AVX-NEXT:    ret <4 x float> [[R3]]
-;
-; AVX512-LABEL: @fmul_fdiv_v4f32_const(
-; AVX512-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 2
-; AVX512-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
-; AVX512-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
-; AVX512-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 1.000000e+00>
-; AVX512-NEXT:    [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
-; AVX512-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
-; AVX512-NEXT:    [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
-; AVX512-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-; AVX512-NEXT:    [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[TMP4]], i32 1
-; AVX512-NEXT:    [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
-; AVX512-NEXT:    [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
-; AVX512-NEXT:    ret <4 x float> [[R3]]
+; CHECK-LABEL: @fmul_fdiv_v4f32_const(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %a0 = extractelement <4 x float> %a, i32 0
   %a1 = extractelement <4 x float> %a, i32 1

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/alternate-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/alternate-int.ll?rev=335364&r1=335363&r2=335364&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/alternate-int.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/alternate-int.ll Fri Jun 22 09:37:34 2018
@@ -8,31 +8,9 @@
 
 define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK-LABEL: @add_sub_v8i32(
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
-; CHECK-NEXT:    [[TMP1:%.*]] = add <8 x i32> [[A]], [[B]]
-; CHECK-NEXT:    [[AB4:%.*]] = sub i32 [[A4]], [[B4]]
-; CHECK-NEXT:    [[AB5:%.*]] = sub i32 [[A5]], [[B5]]
-; CHECK-NEXT:    [[AB6:%.*]] = sub i32 [[A6]], [[B6]]
-; CHECK-NEXT:    [[AB7:%.*]] = sub i32 [[A7]], [[B7]]
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP3]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP4]], i32 2
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP5]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
+; CHECK-NEXT:    [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
+; CHECK-NEXT:    [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    ret <8 x i32> [[R7]]
 ;
   %a0 = extractelement <8 x i32> %a, i32 0
@@ -71,72 +49,11 @@ define <8 x i32> @add_sub_v8i32(<8 x i32
 }
 
 define <4 x i32> @add_and_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; SSE-LABEL: @add_and_v4i32(
-; SSE-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 2
-; SSE-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; SSE-NEXT:    [[B2:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 2
-; SSE-NEXT:    [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
-; SSE-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A]], [[B]]
-; SSE-NEXT:    [[AB2:%.*]] = and i32 [[A2]], [[B2]]
-; SSE-NEXT:    [[AB3:%.*]] = and i32 [[A3]], [[B3]]
-; SSE-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
-; SSE-NEXT:    [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
-; SSE-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
-; SSE-NEXT:    [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP3]], i32 1
-; SSE-NEXT:    [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
-; SSE-NEXT:    [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
-; SSE-NEXT:    ret <4 x i32> [[R3]]
-;
-; SLM-LABEL: @add_and_v4i32(
-; SLM-NEXT:    [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0
-; SLM-NEXT:    [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
-; SLM-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
-; SLM-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; SLM-NEXT:    [[B0:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0
-; SLM-NEXT:    [[B1:%.*]] = extractelement <4 x i32> [[B]], i32 1
-; SLM-NEXT:    [[B2:%.*]] = extractelement <4 x i32> [[B]], i32 2
-; SLM-NEXT:    [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
-; SLM-NEXT:    [[AB0:%.*]] = add i32 [[A0]], [[B0]]
-; SLM-NEXT:    [[AB1:%.*]] = add i32 [[A1]], [[B1]]
-; SLM-NEXT:    [[AB2:%.*]] = and i32 [[A2]], [[B2]]
-; SLM-NEXT:    [[AB3:%.*]] = and i32 [[A3]], [[B3]]
-; SLM-NEXT:    [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[AB0]], i32 0
-; SLM-NEXT:    [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[AB1]], i32 1
-; SLM-NEXT:    [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
-; SLM-NEXT:    [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
-; SLM-NEXT:    ret <4 x i32> [[R3]]
-;
-; AVX-LABEL: @add_and_v4i32(
-; AVX-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 2
-; AVX-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; AVX-NEXT:    [[B2:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 2
-; AVX-NEXT:    [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
-; AVX-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A]], [[B]]
-; AVX-NEXT:    [[AB2:%.*]] = and i32 [[A2]], [[B2]]
-; AVX-NEXT:    [[AB3:%.*]] = and i32 [[A3]], [[B3]]
-; AVX-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
-; AVX-NEXT:    [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
-; AVX-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
-; AVX-NEXT:    [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP3]], i32 1
-; AVX-NEXT:    [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
-; AVX-NEXT:    [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
-; AVX-NEXT:    ret <4 x i32> [[R3]]
-;
-; AVX512-LABEL: @add_and_v4i32(
-; AVX512-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 2
-; AVX512-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; AVX512-NEXT:    [[B2:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 2
-; AVX512-NEXT:    [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
-; AVX512-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A]], [[B]]
-; AVX512-NEXT:    [[AB2:%.*]] = and i32 [[A2]], [[B2]]
-; AVX512-NEXT:    [[AB3:%.*]] = and i32 [[A3]], [[B3]]
-; AVX512-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
-; AVX512-NEXT:    [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
-; AVX512-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
-; AVX512-NEXT:    [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP3]], i32 1
-; AVX512-NEXT:    [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
-; AVX512-NEXT:    [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
-; AVX512-NEXT:    ret <4 x i32> [[R3]]
+; CHECK-LABEL: @add_and_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A]], [[B]]
+; CHECK-NEXT:    [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[R3]]
 ;
   %a0 = extractelement <4 x i32> %a, i32 0
   %a1 = extractelement <4 x i32> %a, i32 1
@@ -158,24 +75,42 @@ define <4 x i32> @add_and_v4i32(<4 x i32
 }
 
 define <4 x i32> @add_mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @add_mul_v4i32(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x i32> [[B]], i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x i32> [[B]], i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
-; CHECK-NEXT:    [[AB0:%.*]] = mul i32 [[A0]], [[B0]]
-; CHECK-NEXT:    [[AB1:%.*]] = add i32 [[A1]], [[B1]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[A2]], [[B2]]
-; CHECK-NEXT:    [[AB3:%.*]] = mul i32 [[A3]], [[B3]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[AB0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[AB1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
-; CHECK-NEXT:    ret <4 x i32> [[R3]]
+; SSE-LABEL: @add_mul_v4i32(
+; SSE-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
+; SSE-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]]
+; SSE-NEXT:    [[R3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 4, i32 1, i32 2, i32 7>
+; SSE-NEXT:    ret <4 x i32> [[R3]]
+;
+; SLM-LABEL: @add_mul_v4i32(
+; SLM-NEXT:    [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0
+; SLM-NEXT:    [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
+; SLM-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
+; SLM-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
+; SLM-NEXT:    [[B0:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0
+; SLM-NEXT:    [[B1:%.*]] = extractelement <4 x i32> [[B]], i32 1
+; SLM-NEXT:    [[B2:%.*]] = extractelement <4 x i32> [[B]], i32 2
+; SLM-NEXT:    [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
+; SLM-NEXT:    [[AB0:%.*]] = mul i32 [[A0]], [[B0]]
+; SLM-NEXT:    [[AB1:%.*]] = add i32 [[A1]], [[B1]]
+; SLM-NEXT:    [[AB2:%.*]] = add i32 [[A2]], [[B2]]
+; SLM-NEXT:    [[AB3:%.*]] = mul i32 [[A3]], [[B3]]
+; SLM-NEXT:    [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[AB0]], i32 0
+; SLM-NEXT:    [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[AB1]], i32 1
+; SLM-NEXT:    [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
+; SLM-NEXT:    [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
+; SLM-NEXT:    ret <4 x i32> [[R3]]
+;
+; AVX-LABEL: @add_mul_v4i32(
+; AVX-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
+; AVX-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]]
+; AVX-NEXT:    [[R3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 4, i32 1, i32 2, i32 7>
+; AVX-NEXT:    ret <4 x i32> [[R3]]
+;
+; AVX512-LABEL: @add_mul_v4i32(
+; AVX512-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
+; AVX512-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]]
+; AVX512-NEXT:    [[R3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 4, i32 1, i32 2, i32 7>
+; AVX512-NEXT:    ret <4 x i32> [[R3]]
 ;
   %a0 = extractelement <4 x i32> %a, i32 0
   %a1 = extractelement <4 x i32> %a, i32 1
@@ -202,160 +137,38 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i3
 ; SSE-NEXT:    [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
 ; SSE-NEXT:    [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
 ; SSE-NEXT:    [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
-; SSE-NEXT:    [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
-; SSE-NEXT:    [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
-; SSE-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
-; SSE-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
 ; SSE-NEXT:    [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
 ; SSE-NEXT:    [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
 ; SSE-NEXT:    [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
 ; SSE-NEXT:    [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
-; SSE-NEXT:    [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
-; SSE-NEXT:    [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
-; SSE-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
-; SSE-NEXT:    [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
 ; SSE-NEXT:    [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
 ; SSE-NEXT:    [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
 ; SSE-NEXT:    [[AB2:%.*]] = ashr i32 [[A2]], [[B2]]
 ; SSE-NEXT:    [[AB3:%.*]] = ashr i32 [[A3]], [[B3]]
-; SSE-NEXT:    [[AB4:%.*]] = shl i32 [[A4]], [[B4]]
-; SSE-NEXT:    [[AB5:%.*]] = shl i32 [[A5]], [[B5]]
-; SSE-NEXT:    [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
-; SSE-NEXT:    [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
+; SSE-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[A]], [[B]]
 ; SSE-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
 ; SSE-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
 ; SSE-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
 ; SSE-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
-; SSE-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
-; SSE-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
-; SSE-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
-; SSE-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
+; SSE-NEXT:    [[R7:%.*]] = shufflevector <8 x i32> [[R3]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
 ; SSE-NEXT:    ret <8 x i32> [[R7]]
 ;
 ; SLM-LABEL: @ashr_shl_v8i32(
-; SLM-NEXT:    [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
-; SLM-NEXT:    [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
-; SLM-NEXT:    [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
-; SLM-NEXT:    [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
-; SLM-NEXT:    [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
-; SLM-NEXT:    [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
-; SLM-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
-; SLM-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
-; SLM-NEXT:    [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
-; SLM-NEXT:    [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
-; SLM-NEXT:    [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
-; SLM-NEXT:    [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
-; SLM-NEXT:    [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
-; SLM-NEXT:    [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
-; SLM-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
-; SLM-NEXT:    [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
-; SLM-NEXT:    [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
-; SLM-NEXT:    [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
-; SLM-NEXT:    [[AB2:%.*]] = ashr i32 [[A2]], [[B2]]
-; SLM-NEXT:    [[AB3:%.*]] = ashr i32 [[A3]], [[B3]]
-; SLM-NEXT:    [[AB4:%.*]] = shl i32 [[A4]], [[B4]]
-; SLM-NEXT:    [[AB5:%.*]] = shl i32 [[A5]], [[B5]]
-; SLM-NEXT:    [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
-; SLM-NEXT:    [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
-; SLM-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
-; SLM-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
-; SLM-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
-; SLM-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
-; SLM-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
-; SLM-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
-; SLM-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
-; SLM-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
+; SLM-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
+; SLM-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
+; SLM-NEXT:    [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
 ; SLM-NEXT:    ret <8 x i32> [[R7]]
 ;
-; AVX1-LABEL: @ashr_shl_v8i32(
-; AVX1-NEXT:    [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
-; AVX1-NEXT:    [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
-; AVX1-NEXT:    [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
-; AVX1-NEXT:    [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
-; AVX1-NEXT:    [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
-; AVX1-NEXT:    [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
-; AVX1-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
-; AVX1-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
-; AVX1-NEXT:    [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
-; AVX1-NEXT:    [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
-; AVX1-NEXT:    [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
-; AVX1-NEXT:    [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
-; AVX1-NEXT:    [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
-; AVX1-NEXT:    [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
-; AVX1-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
-; AVX1-NEXT:    [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
-; AVX1-NEXT:    [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
-; AVX1-NEXT:    [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
-; AVX1-NEXT:    [[AB2:%.*]] = ashr i32 [[A2]], [[B2]]
-; AVX1-NEXT:    [[AB3:%.*]] = ashr i32 [[A3]], [[B3]]
-; AVX1-NEXT:    [[AB4:%.*]] = shl i32 [[A4]], [[B4]]
-; AVX1-NEXT:    [[AB5:%.*]] = shl i32 [[A5]], [[B5]]
-; AVX1-NEXT:    [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
-; AVX1-NEXT:    [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
-; AVX1-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
-; AVX1-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
-; AVX1-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
-; AVX1-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
-; AVX1-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
-; AVX1-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
-; AVX1-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
-; AVX1-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
-; AVX1-NEXT:    ret <8 x i32> [[R7]]
-;
-; AVX2-LABEL: @ashr_shl_v8i32(
-; AVX2-NEXT:    [[A4:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 4
-; AVX2-NEXT:    [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
-; AVX2-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
-; AVX2-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
-; AVX2-NEXT:    [[B4:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 4
-; AVX2-NEXT:    [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
-; AVX2-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
-; AVX2-NEXT:    [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
-; AVX2-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A]], [[B]]
-; AVX2-NEXT:    [[AB4:%.*]] = shl i32 [[A4]], [[B4]]
-; AVX2-NEXT:    [[AB5:%.*]] = shl i32 [[A5]], [[B5]]
-; AVX2-NEXT:    [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
-; AVX2-NEXT:    [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
-; AVX2-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
-; AVX2-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP2]], i32 0
-; AVX2-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
-; AVX2-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP3]], i32 1
-; AVX2-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
-; AVX2-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP4]], i32 2
-; AVX2-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
-; AVX2-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP5]], i32 3
-; AVX2-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
-; AVX2-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
-; AVX2-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
-; AVX2-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
-; AVX2-NEXT:    ret <8 x i32> [[R7]]
+; AVX-LABEL: @ashr_shl_v8i32(
+; AVX-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
+; AVX-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
+; AVX-NEXT:    [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; AVX-NEXT:    ret <8 x i32> [[R7]]
 ;
 ; AVX512-LABEL: @ashr_shl_v8i32(
-; AVX512-NEXT:    [[A4:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 4
-; AVX512-NEXT:    [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
-; AVX512-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
-; AVX512-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
-; AVX512-NEXT:    [[B4:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 4
-; AVX512-NEXT:    [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
-; AVX512-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
-; AVX512-NEXT:    [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
-; AVX512-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A]], [[B]]
-; AVX512-NEXT:    [[AB4:%.*]] = shl i32 [[A4]], [[B4]]
-; AVX512-NEXT:    [[AB5:%.*]] = shl i32 [[A5]], [[B5]]
-; AVX512-NEXT:    [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
-; AVX512-NEXT:    [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
-; AVX512-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
-; AVX512-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP2]], i32 0
-; AVX512-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
-; AVX512-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP3]], i32 1
-; AVX512-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
-; AVX512-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP4]], i32 2
-; AVX512-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
-; AVX512-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP5]], i32 3
-; AVX512-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
-; AVX512-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
-; AVX512-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
-; AVX512-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
+; AVX512-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
+; AVX512-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
+; AVX512-NEXT:    [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
 ; AVX512-NEXT:    ret <8 x i32> [[R7]]
 ;
   %a0 = extractelement <8 x i32> %a, i32 0
@@ -394,30 +207,41 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i3
 }
 
 define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) {
-; CHECK-LABEL: @ashr_shl_v8i32_const(
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[AB4:%.*]] = shl i32 [[A4]], 3
-; CHECK-NEXT:    [[AB5:%.*]] = shl i32 [[A5]], 3
-; CHECK-NEXT:    [[AB6:%.*]] = shl i32 [[A6]], 3
-; CHECK-NEXT:    [[AB7:%.*]] = shl i32 [[A7]], 3
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP4]], i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP5]], i32 2
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP6]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
-; CHECK-NEXT:    ret <8 x i32> [[R7]]
+; SSE-LABEL: @ashr_shl_v8i32_const(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
+; SSE-NEXT:    [[R7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    ret <8 x i32> [[R7]]
+;
+; SLM-LABEL: @ashr_shl_v8i32_const(
+; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SLM-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
+; SLM-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
+; SLM-NEXT:    [[R7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    ret <8 x i32> [[R7]]
+;
+; AVX1-LABEL: @ashr_shl_v8i32_const(
+; AVX1-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX1-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
+; AVX1-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; AVX1-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT:    [[R7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX1-NEXT:    ret <8 x i32> [[R7]]
+;
+; AVX2-LABEL: @ashr_shl_v8i32_const(
+; AVX2-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[A]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT:    [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; AVX2-NEXT:    ret <8 x i32> [[R7]]
+;
+; AVX512-LABEL: @ashr_shl_v8i32_const(
+; AVX512-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[A]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT:    [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; AVX512-NEXT:    ret <8 x i32> [[R7]]
 ;
   %a0 = extractelement <8 x i32> %a, i32 0
   %a1 = extractelement <8 x i32> %a, i32 1
@@ -485,101 +309,111 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8
 ; SLM-LABEL: @ashr_lshr_shl_v8i32(
 ; SLM-NEXT:    [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
 ; SLM-NEXT:    [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
-; SLM-NEXT:    [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
-; SLM-NEXT:    [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
-; SLM-NEXT:    [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
-; SLM-NEXT:    [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
 ; SLM-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
 ; SLM-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
 ; SLM-NEXT:    [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
 ; SLM-NEXT:    [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
-; SLM-NEXT:    [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
-; SLM-NEXT:    [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
-; SLM-NEXT:    [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
-; SLM-NEXT:    [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
 ; SLM-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
 ; SLM-NEXT:    [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
 ; SLM-NEXT:    [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
 ; SLM-NEXT:    [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
-; SLM-NEXT:    [[AB2:%.*]] = lshr i32 [[A2]], [[B2]]
-; SLM-NEXT:    [[AB3:%.*]] = lshr i32 [[A3]], [[B3]]
-; SLM-NEXT:    [[AB4:%.*]] = lshr i32 [[A4]], [[B4]]
-; SLM-NEXT:    [[AB5:%.*]] = lshr i32 [[A5]], [[B5]]
+; SLM-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[A]], [[B]]
 ; SLM-NEXT:    [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
 ; SLM-NEXT:    [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
 ; SLM-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
 ; SLM-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
-; SLM-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
-; SLM-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
-; SLM-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
-; SLM-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
+; SLM-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
+; SLM-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP2]], i32 2
+; SLM-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
+; SLM-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP3]], i32 3
+; SLM-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
+; SLM-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP4]], i32 4
+; SLM-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
+; SLM-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP5]], i32 5
 ; SLM-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
 ; SLM-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
 ; SLM-NEXT:    ret <8 x i32> [[R7]]
 ;
-; AVX-LABEL: @ashr_lshr_shl_v8i32(
-; AVX-NEXT:    [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
-; AVX-NEXT:    [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
-; AVX-NEXT:    [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
-; AVX-NEXT:    [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
-; AVX-NEXT:    [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
-; AVX-NEXT:    [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
-; AVX-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
-; AVX-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
-; AVX-NEXT:    [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
-; AVX-NEXT:    [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
-; AVX-NEXT:    [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
-; AVX-NEXT:    [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
-; AVX-NEXT:    [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
-; AVX-NEXT:    [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
-; AVX-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
-; AVX-NEXT:    [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
-; AVX-NEXT:    [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
-; AVX-NEXT:    [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
-; AVX-NEXT:    [[AB2:%.*]] = lshr i32 [[A2]], [[B2]]
-; AVX-NEXT:    [[AB3:%.*]] = lshr i32 [[A3]], [[B3]]
-; AVX-NEXT:    [[AB4:%.*]] = lshr i32 [[A4]], [[B4]]
-; AVX-NEXT:    [[AB5:%.*]] = lshr i32 [[A5]], [[B5]]
-; AVX-NEXT:    [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
-; AVX-NEXT:    [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
-; AVX-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
-; AVX-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
-; AVX-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
-; AVX-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
-; AVX-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
-; AVX-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
-; AVX-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
-; AVX-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
-; AVX-NEXT:    ret <8 x i32> [[R7]]
+; AVX1-LABEL: @ashr_lshr_shl_v8i32(
+; AVX1-NEXT:    [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
+; AVX1-NEXT:    [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
+; AVX1-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
+; AVX1-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
+; AVX1-NEXT:    [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
+; AVX1-NEXT:    [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
+; AVX1-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
+; AVX1-NEXT:    [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
+; AVX1-NEXT:    [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
+; AVX1-NEXT:    [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
+; AVX1-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[A]], [[B]]
+; AVX1-NEXT:    [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
+; AVX1-NEXT:    [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
+; AVX1-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
+; AVX1-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
+; AVX1-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
+; AVX1-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP2]], i32 2
+; AVX1-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
+; AVX1-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP3]], i32 3
+; AVX1-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
+; AVX1-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP4]], i32 4
+; AVX1-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
+; AVX1-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP5]], i32 5
+; AVX1-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
+; AVX1-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
+; AVX1-NEXT:    ret <8 x i32> [[R7]]
+;
+; AVX2-LABEL: @ashr_lshr_shl_v8i32(
+; AVX2-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6
+; AVX2-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
+; AVX2-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6
+; AVX2-NEXT:    [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]]
+; AVX2-NEXT:    [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]]
+; AVX2-NEXT:    [[TMP5:%.*]] = lshr <8 x i32> [[A]], [[B]]
+; AVX2-NEXT:    [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
+; AVX2-NEXT:    [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
+; AVX2-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
+; AVX2-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP6]], i32 0
+; AVX2-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
+; AVX2-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP7]], i32 1
+; AVX2-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
+; AVX2-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP8]], i32 2
+; AVX2-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
+; AVX2-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP9]], i32 3
+; AVX2-NEXT:    [[TMP10:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4
+; AVX2-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP10]], i32 4
+; AVX2-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5
+; AVX2-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP11]], i32 5
+; AVX2-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
+; AVX2-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
+; AVX2-NEXT:    ret <8 x i32> [[R7]]
 ;
 ; AVX512-LABEL: @ashr_lshr_shl_v8i32(
-; AVX512-NEXT:    [[A2:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 2
-; AVX512-NEXT:    [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
-; AVX512-NEXT:    [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
-; AVX512-NEXT:    [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
-; AVX512-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
+; AVX512-NEXT:    [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6
 ; AVX512-NEXT:    [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
-; AVX512-NEXT:    [[B2:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 2
-; AVX512-NEXT:    [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
-; AVX512-NEXT:    [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
-; AVX512-NEXT:    [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
-; AVX512-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
+; AVX512-NEXT:    [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6
 ; AVX512-NEXT:    [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
-; AVX512-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A]], [[B]]
-; AVX512-NEXT:    [[AB2:%.*]] = lshr i32 [[A2]], [[B2]]
-; AVX512-NEXT:    [[AB3:%.*]] = lshr i32 [[A3]], [[B3]]
-; AVX512-NEXT:    [[AB4:%.*]] = lshr i32 [[A4]], [[B4]]
-; AVX512-NEXT:    [[AB5:%.*]] = lshr i32 [[A5]], [[B5]]
+; AVX512-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]]
+; AVX512-NEXT:    [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]]
+; AVX512-NEXT:    [[TMP5:%.*]] = lshr <8 x i32> [[A]], [[B]]
 ; AVX512-NEXT:    [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
 ; AVX512-NEXT:    [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
-; AVX512-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
-; AVX512-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP2]], i32 0
-; AVX512-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
-; AVX512-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP3]], i32 1
-; AVX512-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
-; AVX512-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
-; AVX512-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
-; AVX512-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
+; AVX512-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
+; AVX512-NEXT:    [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP6]], i32 0
+; AVX512-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
+; AVX512-NEXT:    [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP7]], i32 1
+; AVX512-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
+; AVX512-NEXT:    [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP8]], i32 2
+; AVX512-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
+; AVX512-NEXT:    [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP9]], i32 3
+; AVX512-NEXT:    [[TMP10:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4
+; AVX512-NEXT:    [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP10]], i32 4
+; AVX512-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5
+; AVX512-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP11]], i32 5
 ; AVX512-NEXT:    [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
 ; AVX512-NEXT:    [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
 ; AVX512-NEXT:    ret <8 x i32> [[R7]]

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll?rev=335364&r1=335363&r2=335364&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll Fri Jun 22 09:37:34 2018
@@ -33,16 +33,6 @@ for.body:
 
  ; CHECK-NOT: add nsw <{{[0-9]+}} x i32> 
  ; YAML:      Pass:            slp-vectorizer
- ; YAML-NEXT: Name:            InequableTypes
- ; YAML-NEXT: Function:        foo
- ; YAML-NEXT: Args:
- ; YAML-NEXT:   - String:          'Cannot SLP vectorize list: not all of the '
- ; YAML-NEXT:   - String:          'parts of scalar instructions are of the same type: '
- ; YAML-NEXT:   - Instruction1Opcode: add
- ; YAML-NEXT:   - String:          ' and '
- ; YAML-NEXT:   - Instruction2Opcode: phi
-
- ; YAML:      Pass:            slp-vectorizer
  ; YAML-NEXT: Name:            NotPossible
  ; YAML-NEXT: Function:        foo
  ; YAML-NEXT: Args:

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/resched.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/resched.ll?rev=335364&r1=335363&r2=335364&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/resched.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/resched.ll Fri Jun 22 09:37:34 2018
@@ -12,56 +12,70 @@ define fastcc void @_ZN12_GLOBAL__N_127P
 ; CHECK-NEXT:    [[SUB_I:%.*]] = add nsw i32 undef, -1
 ; CHECK-NEXT:    [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[SHR_I_I:%.*]] = lshr i32 [[CONV31_I]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_1_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 1
-; CHECK-NEXT:    [[SHR_1_I_I:%.*]] = lshr i32 [[CONV31_I]], 2
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_2_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 2
-; CHECK-NEXT:    [[SHR_2_I_I:%.*]] = lshr i32 [[CONV31_I]], 3
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_3_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 3
-; CHECK-NEXT:    [[SHR_3_I_I:%.*]] = lshr i32 [[CONV31_I]], 4
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_4_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 4
-; CHECK-NEXT:    [[SHR_4_I_I:%.*]] = lshr i32 [[CONV31_I]], 5
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_5_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 5
-; CHECK-NEXT:    [[SHR_5_I_I:%.*]] = lshr i32 [[CONV31_I]], 6
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_6_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 6
-; CHECK-NEXT:    [[SHR_6_I_I:%.*]] = lshr i32 [[CONV31_I]], 7
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_7_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 7
-; CHECK-NEXT:    [[SHR_7_I_I:%.*]] = lshr i32 [[CONV31_I]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV31_I]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[CONV31_I]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[CONV31_I]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[CONV31_I]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[CONV31_I]], i32 4
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[CONV31_I]], i32 5
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[CONV31_I]], i32 6
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[CONV31_I]], i32 7
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_8_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 8
-; CHECK-NEXT:    [[SHR_8_I_I:%.*]] = lshr i32 [[CONV31_I]], 9
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_9_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 9
-; CHECK-NEXT:    [[SHR_9_I_I:%.*]] = lshr i32 [[CONV31_I]], 10
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_10_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 10
-; CHECK-NEXT:    [[SHR_10_I_I:%.*]] = lshr i32 [[CONV31_I]], 11
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_11_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 11
-; CHECK-NEXT:    [[SHR_11_I_I:%.*]] = lshr i32 [[CONV31_I]], 12
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> undef, i32 [[CONV31_I]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[CONV31_I]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[CONV31_I]], i32 2
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[CONV31_I]], i32 3
+; CHECK-NEXT:    [[TMP14:%.*]] = lshr <4 x i32> [[TMP13]], <i32 9, i32 10, i32 11, i32 12>
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_12_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 12
 ; CHECK-NEXT:    [[SHR_12_I_I:%.*]] = lshr i32 [[CONV31_I]], 13
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_13_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 13
 ; CHECK-NEXT:    [[SHR_13_I_I:%.*]] = lshr i32 [[CONV31_I]], 14
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_14_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 14
 ; CHECK-NEXT:    [[SHR_14_I_I:%.*]] = lshr i32 [[CONV31_I]], 15
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i32> undef, i32 [[SUB_I]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[SHR_I_I]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SHR_1_I_I]], i32 2
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[SHR_2_I_I]], i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[SHR_3_I_I]], i32 4
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[SHR_4_I_I]], i32 5
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[SHR_5_I_I]], i32 6
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SHR_6_I_I]], i32 7
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_7_I_I]], i32 8
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_8_I_I]], i32 9
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SHR_9_I_I]], i32 10
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[SHR_10_I_I]], i32 11
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[SHR_11_I_I]], i32 12
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[SHR_12_I_I]], i32 13
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[SHR_13_I_I]], i32 14
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[SHR_14_I_I]], i32 15
-; CHECK-NEXT:    [[TMP17:%.*]] = trunc <16 x i32> [[TMP16]] to <16 x i8>
-; CHECK-NEXT:    [[TMP18:%.*]] = and <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[TMP17]]
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <16 x i32> undef, i32 [[SUB_I]], i32 0
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i32> [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP16]], i32 1
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <8 x i32> [[TMP9]], i32 1
+; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[TMP18]], i32 2
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP9]], i32 2
+; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <16 x i32> [[TMP19]], i32 [[TMP20]], i32 3
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP9]], i32 3
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[TMP22]], i32 4
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <8 x i32> [[TMP9]], i32 4
+; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[TMP24]], i32 5
+; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <8 x i32> [[TMP9]], i32 5
+; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <16 x i32> [[TMP25]], i32 [[TMP26]], i32 6
+; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <8 x i32> [[TMP9]], i32 6
+; CHECK-NEXT:    [[TMP29:%.*]] = insertelement <16 x i32> [[TMP27]], i32 [[TMP28]], i32 7
+; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <8 x i32> [[TMP9]], i32 7
+; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <16 x i32> [[TMP29]], i32 [[TMP30]], i32 8
+; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <4 x i32> [[TMP14]], i32 0
+; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <16 x i32> [[TMP31]], i32 [[TMP32]], i32 9
+; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i32> [[TMP14]], i32 1
+; CHECK-NEXT:    [[TMP35:%.*]] = insertelement <16 x i32> [[TMP33]], i32 [[TMP34]], i32 10
+; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <4 x i32> [[TMP14]], i32 2
+; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <16 x i32> [[TMP35]], i32 [[TMP36]], i32 11
+; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <4 x i32> [[TMP14]], i32 3
+; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <16 x i32> [[TMP37]], i32 [[TMP38]], i32 12
+; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <16 x i32> [[TMP39]], i32 [[SHR_12_I_I]], i32 13
+; CHECK-NEXT:    [[TMP41:%.*]] = insertelement <16 x i32> [[TMP40]], i32 [[SHR_13_I_I]], i32 14
+; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <16 x i32> [[TMP41]], i32 [[SHR_14_I_I]], i32 15
+; CHECK-NEXT:    [[TMP43:%.*]] = trunc <16 x i32> [[TMP42]] to <16 x i8>
+; CHECK-NEXT:    [[TMP44:%.*]] = and <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[TMP43]]
 ; CHECK-NEXT:    [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    store <16 x i8> [[TMP18]], <16 x i8>* [[TMP19]], align 1
+; CHECK-NEXT:    [[TMP45:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
+; CHECK-NEXT:    store <16 x i8> [[TMP44]], <16 x i8>* [[TMP45]], align 1
 ; CHECK-NEXT:    unreachable
 ; CHECK:       if.end50.i:
 ; CHECK-NEXT:    ret void




More information about the llvm-commits mailing list