[llvm] r343853 - [X86][AVX] getFauxShuffleMask - add support for INSERT_SUBVECTOR subvector shuffles
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 5 07:41:00 PDT 2018
Author: rksimon
Date: Fri Oct 5 07:41:00 2018
New Revision: 343853
URL: http://llvm.org/viewvc/llvm-project?rev=343853&view=rev
Log:
[X86][AVX] getFauxShuffleMask - add support for INSERT_SUBVECTOR subvector shuffles
Decode subvector shuffles from INSERT_SUBVECTOR(SRC0, SHUFFLE(EXTRACT_SUBVECTOR(SRC1))
This was found necessary while investigating PR39161
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
llvm/trunk/test/CodeGen/X86/oddshuffles.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=343853&r1=343852&r2=343853&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Oct 5 07:41:00 2018
@@ -6380,6 +6380,42 @@ static bool getFauxShuffleMask(SDValue N
Ops.push_back(Op);
return true;
}
+ case ISD::INSERT_SUBVECTOR: {
+ // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(EXTRACT_SUBVECTOR(SRC1)) where
+ // SRC0/SRC1 are both of the same valuetype VT.
+ // TODO - add peekThroughOneUseBitcasts support.
+ SDValue Src = N.getOperand(0);
+ SDValue Sub = N.getOperand(1);
+ EVT SubVT = Sub.getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ if (!isa<ConstantSDNode>(N.getOperand(2)) ||
+ !N->isOnlyUserOf(Sub.getNode()))
+ return false;
+ SmallVector<int, 64> SubMask;
+ SmallVector<SDValue, 2> SubInputs;
+ if (!resolveTargetShuffleInputs(Sub, SubInputs, SubMask, DAG))
+ return false;
+ Ops.push_back(Src);
+ for (SDValue &SubInput : SubInputs) {
+ if (SubInput.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ SubInput.getOperand(0).getValueType() != VT ||
+ !isa<ConstantSDNode>(SubInput.getOperand(1)))
+ return false;
+ Ops.push_back(SubInput.getOperand(0));
+ }
+ int InsertIdx = N.getConstantOperandVal(2);
+ for (int i = 0; i != (int)NumElts; ++i)
+ Mask.push_back(i);
+ for (int i = 0; i != (int)NumSubElts; ++i) {
+ int M = SubMask[i];
+ if (0 <= M) {
+ int InputIdx = M / NumSubElts;
+ int ExtractIdx = SubInputs[InputIdx].getConstantOperandVal(1);
+ Mask[i + InsertIdx] = (NumElts * (1 + InputIdx)) + ExtractIdx + M;
+ }
+ }
+ return true;
+ }
case ISD::SCALAR_TO_VECTOR: {
// Match against a scalar_to_vector of an extract from a vector,
// for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll?rev=343853&r1=343852&r2=343853&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll Fri Oct 5 07:41:00 2018
@@ -3138,8 +3138,7 @@ define <8 x float> @test_masked_16xfloat
; CHECK-LABEL: test_masked_16xfloat_to_8xfloat_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
-; CHECK-NEXT: vmovddup {{.*#+}} xmm3 = xmm3[0,0]
-; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [0,4,10,11,6,1,4,4]
+; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [0,4,8,9,6,1,4,4]
; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqps %ymm0, %ymm2, %k1
@@ -3154,9 +3153,8 @@ define <8 x float> @test_masked_16xfloat
define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mask2(<16 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_to_8xfloat_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; CHECK-NEXT: vmovddup {{.*#+}} xmm3 = xmm2[0,0]
-; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,4,10,11,6,1,4,4]
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
+; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,4,8,9,6,1,4,4]
; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm2 {%k1} {z}
@@ -3459,10 +3457,9 @@ define <8 x float> @test_masked_16xfloat
; CHECK-LABEL: test_masked_16xfloat_to_8xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %zmm2
-; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm2[1,0,0,3]
-; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm2
-; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [8,5,2,3,2,9,10,1]
-; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm4
+; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
+; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [9,5,2,3,2,8,8,1]
+; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovaps %ymm4, %ymm0 {%k1}
@@ -3477,10 +3474,9 @@ define <8 x float> @test_masked_16xfloat
define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mem_mask2(<16 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_to_8xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovaps (%rdi), %zmm1
-; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[1,0,0,3]
-; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm3
-; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [8,5,2,3,2,9,10,1]
+; CHECK-NEXT: vmovaps (%rdi), %zmm2
+; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
+; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [9,5,2,3,2,8,8,1]
; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm1 {%k1} {z}
Modified: llvm/trunk/test/CodeGen/X86/oddshuffles.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/oddshuffles.ll?rev=343853&r1=343852&r2=343853&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/oddshuffles.ll (original)
+++ llvm/trunk/test/CodeGen/X86/oddshuffles.ll Fri Oct 5 07:41:00 2018
@@ -1648,8 +1648,8 @@ define void @interleave_24i32_in(<24 x i
; AVX2-FAST-NEXT: vmovups (%rsi), %ymm0
; AVX2-FAST-NEXT: vmovups (%rdx), %ymm1
; AVX2-FAST-NEXT: vmovups (%rcx), %ymm2
-; AVX2-FAST-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[1,0,2,2]
-; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,1,0,1]
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm3 = [1,0,2,2,1,0,2,2]
+; AVX2-FAST-NEXT: vpermps %ymm1, %ymm3, %ymm3
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[0,0,2,1]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7]
; AVX2-FAST-NEXT: vbroadcastsd %xmm2, %ymm4
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll?rev=343853&r1=343852&r2=343853&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll Fri Oct 5 07:41:00 2018
@@ -32,11 +32,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
ret <32 x i8> %shuffle
}
@@ -50,11 +68,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -68,11 +104,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -86,11 +140,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -104,11 +176,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -122,11 +212,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -140,11 +248,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -158,11 +284,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -176,11 +320,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -194,11 +356,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -212,11 +392,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -230,11 +428,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -248,11 +464,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -266,11 +500,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -286,11 +538,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -1118,11 +1388,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
ret <32 x i8> %shuffle
}
@@ -1134,11 +1422,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -1150,11 +1456,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -1166,11 +1490,29 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -1182,11 +1524,29 @@ define <32 x i8> @shuffle_v32i8_00_14_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
@@ -1200,13 +1560,35 @@ define <32 x i8> @shuffle_v32i8_15_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: movl $15, %eax
-; AVX2OR512VL-NEXT: vmovd %eax, %xmm1
-; AVX2OR512VL-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl $15, %eax
+; AVX2-NEXT: vmovd %eax, %xmm1
+; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2-NEXT: retq
+;
+; AVX512VLBW-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: movl $15, %eax
+; AVX512VLBW-NEXT: vmovd %eax, %xmm1
+; AVX512VLBW-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-SLOW: # %bb.0:
+; AVX512VLVBMI-SLOW-NEXT: movl $15, %eax
+; AVX512VLVBMI-SLOW-NEXT: vmovd %eax, %xmm1
+; AVX512VLVBMI-SLOW-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VLVBMI-SLOW-NEXT: retq
+;
+; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX512VLVBMI-FAST: # %bb.0:
+; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=343853&r1=343852&r2=343853&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Fri Oct 5 07:41:00 2018
@@ -937,19 +937,11 @@ define <4 x i64> @shuffle_v4i64_0412(<4
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
;
-; AVX512VL-SLOW-LABEL: shuffle_v4i64_0412:
-; AVX512VL-SLOW: # %bb.0:
-; AVX512VL-SLOW-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
-; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
-; AVX512VL-SLOW-NEXT: retq
-;
-; AVX512VL-FAST-LABEL: shuffle_v4i64_0412:
-; AVX512VL-FAST: # %bb.0:
-; AVX512VL-FAST-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2]
-; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
-; AVX512VL-FAST-NEXT: retq
+; AVX512VL-LABEL: shuffle_v4i64_0412:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,4,1,2]
+; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
ret <4 x i64> %shuffle
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=343853&r1=343852&r2=343853&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Fri Oct 5 07:41:00 2018
@@ -28,11 +28,29 @@ define <8 x float> @shuffle_v8f32_000000
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v8f32_00000010:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
-; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8f32_00000010:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8f32_00000010:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8f32_00000010:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8f32_00000010:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
+; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
ret <8 x float> %shuffle
}
@@ -45,11 +63,29 @@ define <8 x float> @shuffle_v8f32_000002
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v8f32_00000200:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2]
-; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8f32_00000200:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8f32_00000200:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8f32_00000200:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8f32_00000200:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
+; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
ret <8 x float> %shuffle
}
@@ -62,11 +98,29 @@ define <8 x float> @shuffle_v8f32_000030
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v8f32_00003000:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0]
-; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8f32_00003000:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8f32_00003000:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8f32_00003000:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8f32_00003000:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
+; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
ret <8 x float> %shuffle
}
@@ -175,11 +229,29 @@ define <8 x float> @shuffle_v8f32_000011
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v8f32_00001111:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8f32_00001111:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8f32_00001111:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8f32_00001111:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8f32_00001111:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
+; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
ret <8 x float> %shuffle
}
@@ -292,20 +364,29 @@ define <8 x float> @shuffle_v8f32_08991a
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v8f32_08991abb:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
-; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
-; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8f32_08991abb:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
+; AVX2-SLOW-NEXT: vpermps %ymm1, %ymm2, %ymm1
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8f32_08991abb:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = <0,1,1,3,1,3,u,u>
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
+; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1
+; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX2-FAST-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v8f32_08991abb:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
-; AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3]
-; AVX512VL-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
+; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,1,1,9,2,3,3]
+; AVX512VL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovaps %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x float> %shuffle
@@ -344,26 +425,26 @@ define <8 x float> @shuffle_v8f32_09ab1d
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v8f32_09ab1def:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8f32_09ab1def:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX2-SLOW-NEXT: retq
;
-; AVX512VL-SLOW-LABEL: shuffle_v8f32_09ab1def:
-; AVX512VL-SLOW: # %bb.0:
-; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
-; AVX512VL-SLOW-NEXT: retq
+; AVX2-FAST-LABEL: shuffle_v8f32_09ab1def:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = <0,1,1,3,1,3,u,u>
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0
+; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX2-FAST-NEXT: retq
;
-; AVX512VL-FAST-LABEL: shuffle_v8f32_09ab1def:
-; AVX512VL-FAST: # %bb.0:
-; AVX512VL-FAST-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
-; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm0 = [8,1,2,3,10,5,6,7]
-; AVX512VL-FAST-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
-; AVX512VL-FAST-NEXT: retq
+; AVX512VL-LABEL: shuffle_v8f32_09ab1def:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [8,1,2,3,9,5,6,7]
+; AVX512VL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovaps %ymm2, %ymm0
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
ret <8 x float> %shuffle
}
@@ -740,11 +821,29 @@ define <8 x float> @shuffle_v8f32_321032
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v8f32_32103210:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8f32_32103210:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8f32_32103210:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8f32_32103210:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8f32_32103210:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
+; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
ret <8 x float> %shuffle
}
@@ -1097,11 +1196,29 @@ define <8 x i32> @shuffle_v8i32_00000010
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v8i32_00000010:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
-; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8i32_00000010:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i32_00000010:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i32_00000010:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i32_00000010:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
+; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
ret <8 x i32> %shuffle
}
@@ -1114,11 +1231,29 @@ define <8 x i32> @shuffle_v8i32_00000200
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v8i32_00000200:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2]
-; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8i32_00000200:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i32_00000200:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i32_00000200:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i32_00000200:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
+; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
ret <8 x i32> %shuffle
}
@@ -1131,11 +1266,29 @@ define <8 x i32> @shuffle_v8i32_00003000
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v8i32_00003000:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0]
-; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; AVX2OR512VL-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8i32_00003000:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i32_00003000:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i32_00003000:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i32_00003000:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
+; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
ret <8 x i32> %shuffle
}
@@ -1249,11 +1402,29 @@ define <8 x i32> @shuffle_v8i32_00001111
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v8i32_00001111:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8i32_00001111:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i32_00001111:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i32_00001111:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i32_00001111:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
+; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
ret <8 x i32> %shuffle
}
@@ -1897,11 +2068,29 @@ define <8 x i32> @shuffle_v8i32_32103210
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v8i32_32103210:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2OR512VL-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8i32_32103210:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i32_32103210:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
+; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i32_32103210:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i32_32103210:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
+; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
ret <8 x i32> %shuffle
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll?rev=343853&r1=343852&r2=343853&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll Fri Oct 5 07:41:00 2018
@@ -541,35 +541,33 @@ define <8 x float> @expand14(<4 x float>
define <8 x float> @expand15(<4 x float> %a) {
; SKX64-LABEL: expand15:
; SKX64: # %bb.0:
-; SKX64-NEXT: vmovaps {{.*#+}} xmm1 = [0,2,0,0]
-; SKX64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
-; SKX64-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
-; SKX64-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
+; SKX64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; SKX64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,8,3,10,5,6,7]
+; SKX64-NEXT: vbroadcastss {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
+; SKX64-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
; SKX64-NEXT: retq
;
; KNL64-LABEL: expand15:
; KNL64: # %bb.0:
-; KNL64-NEXT: vmovaps {{.*#+}} xmm1 = [0,2,0,0]
-; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
+; KNL64-NEXT: vbroadcastss {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
; KNL64-NEXT: retq
;
; SKX32-LABEL: expand15:
; SKX32: # %bb.0:
-; SKX32-NEXT: vmovaps {{.*#+}} xmm1 = [0,2,0,0]
-; SKX32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
-; SKX32-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
-; SKX32-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
+; SKX32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; SKX32-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,8,3,10,5,6,7]
+; SKX32-NEXT: vbroadcastss {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
+; SKX32-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
; SKX32-NEXT: retl
;
; KNL32-LABEL: expand15:
; KNL32: # %bb.0:
-; KNL32-NEXT: vmovaps {{.*#+}} xmm1 = [0,2,0,0]
-; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
+; KNL32-NEXT: vbroadcastss {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
; KNL32-NEXT: retl
%addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=343853&r1=343852&r2=343853&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Fri Oct 5 07:41:00 2018
@@ -540,9 +540,8 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %
; AVX512VL-NEXT: kmovw %edi, %k1
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,2,3]
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,1,10,3,0,1,2,3]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,2,10,3,3,2,2,3]
; AVX512VL-NEXT: vpermi2d %ymm1, %ymm0, %ymm2
; AVX512VL-NEXT: vpslld $31, %ymm2, %ymm0
; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
@@ -555,9 +554,8 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: kmovd %edi, %k0
; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
-; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,2,3]
; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VL_BW_DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8,1,10,3,0,1,2,3]
+; VL_BW_DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8,2,10,3,3,2,2,3]
; VL_BW_DQ-NEXT: vpermi2d %ymm1, %ymm0, %ymm2
; VL_BW_DQ-NEXT: vpmovd2m %ymm2, %k0
; VL_BW_DQ-NEXT: kmovd %k0, %eax
@@ -677,9 +675,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %
; AVX512VL-NEXT: kmovw %edi, %k1
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2]
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3],ymm1[4,5,6,7]
+; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[8,9,10,11],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
@@ -690,9 +686,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: kmovd %edi, %k0
; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
-; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2]
-; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VL_BW_DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3],ymm1[4,5,6,7]
+; VL_BW_DQ-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[8,9,10,11],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax
More information about the llvm-commits
mailing list