[llvm] r258081 - [X86][AVX2] Broadcast subvectors
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 18 12:59:05 PST 2016
Author: rksimon
Date: Mon Jan 18 14:59:04 2016
New Revision: 258081
URL: http://llvm.org/viewvc/llvm-project?rev=258081&view=rev
Log:
[X86][AVX2] Broadcast subvectors
AVX2 can only broadcast from the zero'th element of a vector, but if the broadcastable element is the zero'th element of a 128-bit subvector its advantageous to extract the subvector, broadcast from that and avoid the loading of shuffle mask data that would be needed for VPERMPS/VPERMD. The only exception being when the source type is 4f64 or 4i64 which can directly use the immediate shuffle VPERMPD/VPERMQ directly.
Differential Revision: http://reviews.llvm.org/D16050
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=258081&r1=258080&r2=258081&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jan 18 14:59:04 2016
@@ -8291,10 +8291,28 @@ static SDValue lowerVectorShuffleAsBroad
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
DAG.getMachineFunction().getMachineMemOperand(
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
- } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
- // We can't broadcast from a vector register without AVX2, and we can only
- // broadcast from the zero-element of a vector register.
+ } else if (!Subtarget->hasAVX2()) {
+ // We can't broadcast from a vector register without AVX2.
return SDValue();
+ } else if (BroadcastIdx != 0) {
+ // We can only broadcast from the zero-element of a vector register,
+ // but it can be advantageous to broadcast from the zero-element of a
+ // subvector.
+ if (!VT.is256BitVector() && !VT.is512BitVector())
+ return SDValue();
+
+ // VPERMQ/VPERMPD can perform the cross-lane shuffle directly.
+ if (VT == MVT::v4f64 || VT == MVT::v4i64)
+ return SDValue();
+
+ // Only broadcast the zero-element of a 128-bit subvector.
+ unsigned EltSize = VT.getScalarSizeInBits();
+ if (((BroadcastIdx * EltSize) % 128) != 0)
+ return SDValue();
+
+ MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 128 / EltSize);
+ V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V,
+ DAG.getIntPtrConstant(BroadcastIdx, DL));
}
V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V);
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=258081&r1=258080&r2=258081&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Mon Jan 18 14:59:04 2016
@@ -2904,8 +2904,8 @@ define <16 x i16> @shuffle_v16i16_uu_uu_
;
; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
-; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15]
; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
@@ -3293,8 +3293,7 @@ define <16 x i16> @shuffle_v16i16_8_8_8_
; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <16 x i16> %shuffle
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll?rev=258081&r1=258080&r2=258081&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll Mon Jan 18 14:59:04 2016
@@ -2006,8 +2006,7 @@ define <32 x i8> @shuffle_v32i8_16_16_16
; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <32 x i8> %shuffle
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=258081&r1=258080&r2=258081&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Mon Jan 18 14:59:04 2016
@@ -851,8 +851,8 @@ define <8 x float> @shuffle_v8f32_444444
;
; AVX2-LABEL: shuffle_v8f32_44444444:
; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
-; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
ret <8 x float> %shuffle
@@ -2015,8 +2015,8 @@ define <8 x i32> @shuffle_v8i32_44444444
;
; AVX2-LABEL: shuffle_v8i32_44444444:
; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
-; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
ret <8 x i32> %shuffle
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll?rev=258081&r1=258080&r2=258081&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll Mon Jan 18 14:59:04 2016
@@ -4,6 +4,25 @@
target triple = "x86_64-unknown-unknown"
+define <16 x float> @shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x float> %a, <16 x float> %b) {
+; ALL-LABEL: shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastss %xmm0, %zmm0
+; ALL-NEXT: retq
+ %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x float> %shuffle
+}
+
+define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<16 x float> %a, <16 x float> %b) {
+; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
+; ALL: # BB#0:
+; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0
+; ALL-NEXT: vbroadcastss %xmm0, %zmm0
+; ALL-NEXT: retq
+ %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <16 x float> %shuffle
+}
+
define <16 x float> @shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x float> %a, <16 x float> %b) {
; ALL-LABEL: shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
; ALL: # BB#0:
@@ -70,6 +89,25 @@ define <16 x float> @shuffle_v16f32_zz_1
ret <16 x float> %shuffle
}
+define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i32> %a, <16 x i32> %b) {
+; ALL-LABEL: shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; ALL: # BB#0:
+; ALL-NEXT: vpbroadcastd %xmm0, %zmm0
+; ALL-NEXT: retq
+ %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x i32> %shuffle
+}
+
+define <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<16 x i32> %a, <16 x i32> %b) {
+; ALL-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04:
+; ALL: # BB#0:
+; ALL-NEXT: vextracti32x4 $1, %zmm0, %xmm0
+; ALL-NEXT: vpbroadcastd %xmm0, %zmm0
+; ALL-NEXT: retq
+ %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+ ret <16 x i32> %shuffle
+}
+
define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
; ALL: # BB#0:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll?rev=258081&r1=258080&r2=258081&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll Mon Jan 18 14:59:04 2016
@@ -3,6 +3,25 @@
target triple = "x86_64-unknown-unknown"
+define <32 x i16> @shuffle_v32i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i16> %a) {
+; ALL-LABEL: shuffle_v32i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; ALL: # BB#0:
+; ALL-NEXT: vpbroadcastw %xmm0, %zmm0
+; ALL-NEXT: retq
+ %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> zeroinitializer
+ ret <32 x i16> %c
+}
+
+define <32 x i16> @shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<32 x i16> %a) {
+; ALL-LABEL: shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
+; ALL: # BB#0:
+; ALL-NEXT: vextracti32x4 $1, %zmm0, %xmm0
+; ALL-NEXT: vpbroadcastw %xmm0, %zmm0
+; ALL-NEXT: retq
+ %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <32 x i16> %c
+}
+
define <32 x i16> @shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f(<32 x i16> %a) {
; ALL-LABEL: shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f:
; ALL: # BB#0:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=258081&r1=258080&r2=258081&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll Mon Jan 18 14:59:04 2016
@@ -18,6 +18,38 @@ define <8 x double> @shuffle_v8f64_00000
ret <8 x double> %shuffle
}
+define <8 x double> @shuffle_v8f64_22222222(<8 x double> %a, <8 x double> %b) {
+; AVX512F-LABEL: shuffle_v8f64_22222222:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vextractf32x4 $1, %zmm0, %xmm0
+; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_22222222:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vextractf32x4 $1, %zmm0, %xmm0
+; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0
+; AVX512F-32-NEXT: retl
+ %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_44444444(<8 x double> %a, <8 x double> %b) {
+; AVX512F-LABEL: shuffle_v8f64_44444444:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm0
+; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_44444444:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vextractf32x4 $2, %zmm0, %xmm0
+; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0
+; AVX512F-32-NEXT: retl
+ %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+ ret <8 x double> %shuffle
+}
+
define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00000010:
; AVX512F: # BB#0:
@@ -994,6 +1026,38 @@ define <8 x i64> @shuffle_v8i64_00000000
ret <8 x i64> %shuffle
}
+define <8 x i64> @shuffle_v8i64_44444444(<8 x i64> %a, <8 x i64> %b) {
+; AVX512F-LABEL: shuffle_v8i64_44444444:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm0
+; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_44444444:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vextracti32x4 $2, %zmm0, %xmm0
+; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512F-32-NEXT: retl
+ %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+ ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_66666666(<8 x i64> %a, <8 x i64> %b) {
+; AVX512F-LABEL: shuffle_v8i64_66666666:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_66666666:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512F-32-NEXT: retl
+ %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
+ ret <8 x i64> %shuffle
+}
+
define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
;
; AVX512F-LABEL: shuffle_v8i64_00000010:
@@ -2102,7 +2166,7 @@ define <8 x double> @test_vshuff64x2_512
; AVX512F-32-LABEL: test_vshuff64x2_512_maskz:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
-; AVX512F-32-NEXT: vpsllvq .LCPI122_0, %zmm2, %zmm2
+; AVX512F-32-NEXT: vpsllvq .LCPI126_0, %zmm2, %zmm2
; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
; AVX512F-32-NEXT: retl
@@ -2123,7 +2187,7 @@ define <8 x i64> @test_vshufi64x2_512_ma
; AVX512F-32-LABEL: test_vshufi64x2_512_mask:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
-; AVX512F-32-NEXT: vpsllvq .LCPI123_0, %zmm2, %zmm2
+; AVX512F-32-NEXT: vpsllvq .LCPI127_0, %zmm2, %zmm2
; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
; AVX512F-32-NEXT: retl
@@ -2160,7 +2224,7 @@ define <8 x double> @test_vshuff64x2_512
; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1
-; AVX512F-32-NEXT: vpsllvq .LCPI125_0, %zmm1, %zmm1
+; AVX512F-32-NEXT: vpsllvq .LCPI129_0, %zmm1, %zmm1
; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
@@ -2183,7 +2247,7 @@ define <8 x double> @test_vshuff64x2_512
; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1
-; AVX512F-32-NEXT: vpsllvq .LCPI126_0, %zmm1, %zmm1
+; AVX512F-32-NEXT: vpsllvq .LCPI130_0, %zmm1, %zmm1
; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=258081&r1=258080&r2=258081&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Mon Jan 18 14:59:04 2016
@@ -165,8 +165,8 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0
; AVX512F-NEXT: vmovdqu64 %zmm0, %zmm1 {%k1} {z}
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2
-; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1
+; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm1
+; AVX512F-NEXT: vpbroadcastq %xmm1, %zmm1
; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
; AVX512F-NEXT: vmovdqu64 %zmm0, %zmm0 {%k1} {z}
@@ -177,8 +177,8 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u
; VL_BW_DQ: # BB#0:
; VL_BW_DQ-NEXT: kmovb %edi, %k0
; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT: vpbroadcastq {{.*}}(%rip), %zmm1
-; VL_BW_DQ-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; VL_BW_DQ-NEXT: vextracti64x2 $1, %zmm0, %xmm0
+; VL_BW_DQ-NEXT: vpbroadcastq %xmm0, %zmm0
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0
More information about the llvm-commits
mailing list