[llvm] r310724 - [x86] Enable some support for lowerVectorShuffleWithUndefHalf with AVX-512
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 11 09:20:05 PDT 2017
Author: ctopper
Date: Fri Aug 11 09:20:05 2017
New Revision: 310724
URL: http://llvm.org/viewvc/llvm-project?rev=310724&view=rev
Log:
[x86] Enable some support for lowerVectorShuffleWithUndefHalf with AVX-512
Summary:
This teaches 512-bit shuffles to detect unused halfs in order to reduce shuffle size.
We may need to refine the 512-bit exit point. I couldn't remember if we had good cross lane shuffles for 8/16 bit with AVX-512 or not.
I believe this is step towards being able to handle D36454 without a special case.
>From here we need to improve our ability to combine extract_subvector with insert_subvector and other extract_subvectors. And we need to support narrowing binary operations where we don't demand all elements. This may be improvements to DAGCombiner::narrowExtractedVectorBinOp(by recognizing an insert_subvector in addition to concat) or we may need a target specific combiner.
Reviewers: RKSimon, zvi, delena, jbhateja
Reviewed By: RKSimon, jbhateja
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D36601
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/madd.ll
llvm/trunk/test/CodeGen/X86/sad.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=310724&r1=310723&r2=310724&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Aug 11 09:20:05 2017
@@ -12256,7 +12256,7 @@ static SDValue lowerVectorShuffleByMergi
return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
}
-/// Lower shuffles where an entire half of a 256-bit vector is UNDEF.
+/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
/// This allows for fast cases such as subvector extraction/insertion
/// or shuffling smaller vector types which can lower more efficiently.
static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
@@ -12264,7 +12264,8 @@ static SDValue lowerVectorShuffleWithUnd
ArrayRef<int> Mask,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert(VT.is256BitVector() && "Expected 256-bit vector");
+ assert((VT.is256BitVector() || VT.is512BitVector()) &&
+ "Expected 256-bit or 512-bit vector");
unsigned NumElts = VT.getVectorNumElements();
unsigned HalfNumElts = NumElts / 2;
@@ -12360,6 +12361,10 @@ static SDValue lowerVectorShuffleWithUnd
}
}
+ // AVX512 - XXXXuuuu - always extract lowers.
+ if (VT.is512BitVector() && !(UndefUpper && NumUpperHalves == 0))
+ return SDValue();
+
auto GetHalfVector = [&](int HalfIdx) {
if (HalfIdx < 0)
return DAG.getUNDEF(HalfVT);
@@ -13703,6 +13708,11 @@ static SDValue lower512BitVectorShuffle(
DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Insertion;
+ // Handle special cases where the lower or upper half is UNDEF.
+ if (SDValue V =
+ lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return V;
+
// Check for being able to broadcast a single element.
if (SDValue Broadcast =
lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
Modified: llvm/trunk/test/CodeGen/X86/madd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/madd.ll?rev=310724&r1=310723&r2=310724&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/madd.ll (original)
+++ llvm/trunk/test/CodeGen/X86/madd.ll Fri Aug 11 09:20:05 2017
@@ -323,13 +323,13 @@ define i32 @_Z9test_charPcS_i(i8* nocapt
; AVX512-NEXT: cmpq %rcx, %rax
; AVX512-NEXT: jne .LBB2_1
; AVX512-NEXT: # BB#2: # %middle.block
-; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
+; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/sad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sad.ll?rev=310724&r1=310723&r2=310724&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sad.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sad.ll Fri Aug 11 09:20:05 2017
@@ -72,13 +72,13 @@ define i32 @sad_16i8() nounwind {
; AVX512F-NEXT: addq $4, %rax
; AVX512F-NEXT: jne .LBB0_1
; AVX512F-NEXT: # BB#2: # %middle.block
-; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: vzeroupper
@@ -98,13 +98,13 @@ define i32 @sad_16i8() nounwind {
; AVX512BW-NEXT: addq $4, %rax
; AVX512BW-NEXT: jne .LBB0_1
; AVX512BW-NEXT: # BB#2: # %middle.block
-; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vzeroupper
@@ -321,13 +321,13 @@ define i32 @sad_32i8() nounwind {
; AVX512F-NEXT: jne .LBB1_1
; AVX512F-NEXT: # BB#2: # %middle.block
; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm0
-; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: vzeroupper
@@ -349,13 +349,13 @@ define i32 @sad_32i8() nounwind {
; AVX512BW-NEXT: jne .LBB1_1
; AVX512BW-NEXT: # BB#2: # %middle.block
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
-; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vzeroupper
@@ -794,13 +794,13 @@ define i32 @sad_avx64i8() nounwind {
; AVX512F-NEXT: vpaddd %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vpaddd %zmm3, %zmm1, %zmm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: vzeroupper
@@ -823,13 +823,13 @@ define i32 @sad_avx64i8() nounwind {
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
-; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll?rev=310724&r1=310723&r2=310724&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll Fri Aug 11 09:20:05 2017
@@ -262,19 +262,10 @@ define <16 x i32> @shuffle_v16i32_load_0
}
define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) {
-; AVX512F-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: movw $8, %ax
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: movw $8, %ax
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
-; AVX512BW-NEXT: retq
+; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
+; ALL: # BB#0:
+; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; ALL-NEXT: retq
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i32> %c
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll?rev=310724&r1=310723&r2=310724&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll Fri Aug 11 09:20:05 2017
@@ -101,7 +101,7 @@ define <32 x i16> @shuffle_v16i32_0_32_1
;
; SKX-LABEL: shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u:
; SKX: ## BB#0:
-; SKX-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
+; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
; SKX-NEXT: retq
%c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <32 x i16> %c
@@ -115,7 +115,7 @@ define <32 x i16> @shuffle_v16i32_4_36_5
;
; SKX-LABEL: shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u:
; SKX: ## BB#0:
-; SKX-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
+; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
; SKX-NEXT: retq
%c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <32 x i16> %c
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll?rev=310724&r1=310723&r2=310724&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll Fri Aug 11 09:20:05 2017
@@ -5,25 +5,10 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VBMI
define <64 x i8> @shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a) {
-; AVX512F-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512VBMI-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
-; AVX512VBMI: # BB#0:
-; AVX512VBMI-NEXT: vpsrld $16, %zmm0, %zmm0
-; AVX512VBMI-NEXT: retq
+; ALL-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
+; ALL: # BB#0:
+; ALL-NEXT: vpsrld $16, %xmm0, %xmm0
+; ALL-NEXT: retq
%b = shufflevector <64 x i8> %a, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <64 x i8> %b
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=310724&r1=310723&r2=310724&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll Fri Aug 11 09:20:05 2017
@@ -2241,12 +2241,12 @@ define <8 x double> @shuffle_v8f64_2301u
define <8 x double> @shuffle_v8f64_2301uuuu(<8 x double> %a0, <8 x double> %a1) {
; AVX512F-LABEL: shuffle_v8f64_2301uuuu:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
+; AVX512F-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_2301uuuu:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
+; AVX512F-32-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
; AVX512F-32-NEXT: retl
%1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x double> %1
More information about the llvm-commits
mailing list