[llvm] r352999 - [X86][AVX] Support shuffle combining for VBROADCAST with smaller vector sources
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 3 08:51:33 PST 2019
Author: rksimon
Date: Sun Feb 3 08:51:33 2019
New Revision: 352999
URL: http://llvm.org/viewvc/llvm-project?rev=352999&view=rev
Log:
[X86][AVX] Support shuffle combining for VBROADCAST with smaller vector sources
getTargetShuffleMask can only do this safely if we're extracting the lowest subvector from a vector of the same result type.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=352999&r1=352998&r2=352999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Feb 3 08:51:33 2019
@@ -6773,6 +6773,26 @@ static bool getFauxShuffleMask(SDValue N
}
return true;
}
+ case X86ISD::VBROADCAST: {
+ SDValue Src = N.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ if (!SrcVT.isVector())
+ return false;
+
+ if (NumSizeInBits != SrcVT.getSizeInBits()) {
+ assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
+ "Illegal broadcast type");
+ SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
+ NumSizeInBits / SrcVT.getScalarSizeInBits());
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), SrcVT,
+ DAG.getUNDEF(SrcVT), Src,
+ DAG.getIntPtrConstant(0, SDLoc(N)));
+ }
+
+ Ops.push_back(Src);
+ Mask.append(NumElts, 0);
+ return true;
+ }
case ISD::ZERO_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND: {
SDValue Src = N.getOperand(0);
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll?rev=352999&r1=352998&r2=352999&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll Sun Feb 3 08:51:33 2019
@@ -2174,8 +2174,7 @@ define <4 x i64> @test_masked_8xi64_to_4
; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mask7:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
-; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,0,3,7]
+; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,0,3,4]
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
; CHECK-NEXT: vpblendmq %ymm4, %ymm1, %ymm0 {%k1}
@@ -2189,9 +2188,8 @@ define <4 x i64> @test_masked_8xi64_to_4
define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; CHECK-NEXT: vpbroadcastq %xmm2, %ymm3
-; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,0,3,7]
+; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm3
+; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,0,3,4]
; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
@@ -3806,9 +3804,8 @@ define <4 x double> @test_masked_z_8xdou
define <4 x double> @test_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec) {
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
-; CHECK-NEXT: vbroadcastsd %xmm1, %ymm2
-; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,7]
+; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2
+; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,4]
; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@@ -3819,8 +3816,7 @@ define <4 x double> @test_masked_8xdoubl
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
-; CHECK-NEXT: vbroadcastsd %xmm3, %ymm3
-; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,7]
+; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,4]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
@@ -3835,9 +3831,8 @@ define <4 x double> @test_masked_8xdoubl
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2
-; CHECK-NEXT: vbroadcastsd %xmm2, %ymm3
-; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,7]
+; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
+; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,4]
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll?rev=352999&r1=352998&r2=352999&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll Sun Feb 3 08:51:33 2019
@@ -190,9 +190,10 @@ define <4 x i64> @expand4(<2 x i64> %a )
define <8 x float> @expand5(<4 x float> %a ) {
; SKX64-LABEL: expand5:
; SKX64: # %bb.0:
-; SKX64-NEXT: vbroadcastss %xmm0, %ymm0
+; SKX64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; SKX64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; SKX64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
+; SKX64-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,10,0,12,0,14,0]
+; SKX64-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
; SKX64-NEXT: retq
;
; KNL64-LABEL: expand5:
@@ -204,9 +205,10 @@ define <8 x float> @expand5(<4 x float>
;
; SKX32-LABEL: expand5:
; SKX32: # %bb.0:
-; SKX32-NEXT: vbroadcastss %xmm0, %ymm0
+; SKX32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; SKX32-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; SKX32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
+; SKX32-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,10,0,12,0,14,0]
+; SKX32-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
; SKX32-NEXT: retl
;
; KNL32-LABEL: expand5:
More information about the llvm-commits
mailing list