[llvm] r290870 - [AVX-512] Teach shuffle lowering to use vinsert instructions for shuffles corresponding to 256-bit subvector inserts.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 2 23:00:41 PST 2017
Author: ctopper
Date: Tue Jan 3 01:00:40 2017
New Revision: 290870
URL: http://llvm.org/viewvc/llvm-project?rev=290870&view=rev
Log:
[AVX-512] Teach shuffle lowering to use vinsert instructions for shuffles corresponding to 256-bit subvector inserts.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=290870&r1=290869&r2=290870&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jan 3 01:00:40 2017
@@ -12590,6 +12590,40 @@ static SDValue lower256BitVectorShuffle(
}
}
+/// \brief Try to lower a vector shuffle as a 256-bit shuffle.
+static SDValue lowerV2X256VectorShuffle(const SDLoc &DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
+ assert(VT.getScalarSizeInBits() == 64 &&
+ "Unexpected element type size for 128bit shuffle.");
+
+ // To handle 256 bit vector requires VLX and most probably
+ // function lowerV2X128VectorShuffle() is better solution.
+ assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle.");
+
+ assert(Mask.size() == 4 && "Expect mask to already be widened to 128-bits.");
+
+ SmallVector<int, 2> WidenedMask;
+ if (!canWidenShuffleElements(Mask, WidenedMask))
+ return SDValue();
+
+ // Check for patterns which can be matched with a single insert of a 256-bit
+ // subvector.
+ bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, WidenedMask, {0, 0});
+ if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, WidenedMask, {0, 2})) {
+ MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
+ SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
+ OnlyUsesV1 ? V1 : V2,
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ }
+
+ return SDValue();
+}
+
/// \brief Try to lower a vector shuffle as a 128-bit shuffles.
static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
@@ -12605,6 +12639,11 @@ static SDValue lowerV4X128VectorShuffle(
if (!canWidenShuffleElements(Mask, WidenedMask))
return SDValue();
+ // See if we can widen even further to a 256-bit element.
+ if (SDValue Shuf256 = lowerV2X256VectorShuffle(DL, VT, WidenedMask, V1, V2,
+ DAG))
+ return Shuf256;
+
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
// Insure elements came from the same Op.
int MaxOp1Index = VT.getVectorNumElements()/2 - 1;
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=290870&r1=290869&r2=290870&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Tue Jan 3 01:00:40 2017
@@ -3020,11 +3020,10 @@ declare <8 x double> @llvm.x86.avx512.ma
define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[0,1,2,3],zmm1[0,1,2,3]
+; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3],zmm1[0,1,2,3]
-; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,2,3]
+; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
@@ -3041,11 +3040,10 @@ declare <8 x i64> @llvm.x86.avx512.mask.
define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm0[0,1,2,3],zmm1[0,1,2,3]
+; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3],zmm1[0,1,2,3]
-; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,2,3]
+; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll?rev=290870&r1=290869&r2=290870&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll Tue Jan 3 01:00:40 2017
@@ -30,9 +30,9 @@ define <8 x i1> @test2(<2 x i1> %a) {
; CHECK: # BB#0:
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
-; CHECK-NEXT: vpmovm2q %k0, %zmm0
-; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,0,1],zmm0[0,1,0,1]
+; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; CHECK-NEXT: vpmovm2q %k0, %zmm1
+; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; CHECK-NEXT: vpmovq2m %zmm0, %k0
; CHECK-NEXT: vpmovm2w %k0, %xmm0
; CHECK-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll?rev=290870&r1=290869&r2=290870&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll Tue Jan 3 01:00:40 2017
@@ -58,11 +58,10 @@ declare <16 x float> @llvm.x86.avx512.ma
define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm3 = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
-; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
; CHECK-NEXT: retq
@@ -102,11 +101,10 @@ declare <16 x i32> @llvm.x86.avx512.mask
define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm3 = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
-; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
More information about the llvm-commits
mailing list