[llvm] r352994 - [X86][AVX] More aggressively simplify BROADCAST source operand
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 3 06:39:42 PST 2019
Author: rksimon
Date: Sun Feb 3 06:39:41 2019
New Revision: 352994
URL: http://llvm.org/viewvc/llvm-project?rev=352994&view=rev
Log:
[X86][AVX] More aggressively simplify BROADCAST source operand
Aim to use scalar source or lowest 128-bit vector directly.
We're still missing some VZMOVL_LOAD combines.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll
llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=352994&r1=352993&r2=352994&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Feb 3 06:39:41 2019
@@ -32004,12 +32004,13 @@ static SDValue combineTargetShuffle(SDVa
switch (Opcode) {
case X86ISD::VBROADCAST: {
- // If broadcasting from another shuffle, attempt to simplify it.
- // TODO - we really need a general SimplifyDemandedVectorElts mechanism.
SDValue Src = N.getOperand(0);
SDValue BC = peekThroughBitcasts(Src);
EVT SrcVT = Src.getValueType();
EVT BCVT = BC.getValueType();
+
+ // If broadcasting from another shuffle, attempt to simplify it.
+ // TODO - we really need a general SimplifyDemandedVectorElts mechanism.
if (isTargetShuffle(BC.getOpcode()) &&
VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) {
unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits();
@@ -32023,6 +32024,7 @@ static SDValue combineTargetShuffle(SDVa
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res));
}
+
// broadcast(bitcast(src)) -> bitcast(broadcast(src))
// 32-bit targets have to bitcast i64 to f64, so better to bitcast upward.
if (Src.getOpcode() == ISD::BITCAST &&
@@ -32031,6 +32033,16 @@ static SDValue combineTargetShuffle(SDVa
VT.getVectorNumElements());
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
}
+
+ // Reduce broadcast source vector to lowest 128-bits.
+ if (SrcVT.getSizeInBits() > 128)
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
+ extract128BitVector(Src, 0, DAG, DL));
+
+ // broadcast(scalar_to_vector(x)) -> broadcast(x).
+ if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
+
return SDValue();
}
case X86ISD::PSHUFD:
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll?rev=352994&r1=352993&r2=352994&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll Sun Feb 3 06:39:41 2019
@@ -536,8 +536,7 @@ define <8 x i32> @test_masked_z_2xi32_to
define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) {
; CHECK-LABEL: test_2xi32_to_16xi32_mem:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
+; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -546,10 +545,8 @@ define <16 x i32> @test_2xi32_to_16xi32_
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -561,10 +558,8 @@ define <16 x i32> @test_masked_2xi32_to_
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -575,10 +570,8 @@ define <16 x i32> @test_masked_z_2xi32_t
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -590,10 +583,8 @@ define <16 x i32> @test_masked_2xi32_to_
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -604,10 +595,8 @@ define <16 x i32> @test_masked_z_2xi32_t
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -619,10 +608,8 @@ define <16 x i32> @test_masked_2xi32_to_
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -633,10 +620,8 @@ define <16 x i32> @test_masked_z_2xi32_t
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -648,10 +633,8 @@ define <16 x i32> @test_masked_2xi32_to_
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
Modified: llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll?rev=352994&r1=352993&r2=352994&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll Sun Feb 3 06:39:41 2019
@@ -1598,8 +1598,7 @@ define <16 x i32> @test_2xi32_to_16xi32_
; X32-AVX512-LABEL: test_2xi32_to_16xi32_mem:
; X32-AVX512: # %bb.0:
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-AVX512-NEXT: vbroadcastsd %xmm0, %zmm0
+; X32-AVX512-NEXT: vbroadcastsd (%eax), %zmm0
; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: test_2xi32_to_16xi32_mem:
@@ -1610,8 +1609,7 @@ define <16 x i32> @test_2xi32_to_16xi32_
;
; X64-AVX512-LABEL: test_2xi32_to_16xi32_mem:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X64-AVX512-NEXT: vbroadcastsd %xmm0, %zmm0
+; X64-AVX512-NEXT: vbroadcastsd (%rdi), %zmm0
; X64-AVX512-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=352994&r1=352993&r2=352994&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Sun Feb 3 06:39:41 2019
@@ -141,7 +141,6 @@ define <16 x i8> @combine_pshufb_as_vpbr
define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -165,7 +164,6 @@ define <16 x i8> @combine_pshufb_as_vpbr
define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -197,14 +195,12 @@ define <16 x i8> @combine_pshufb_as_vpbr
define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
; X86-LABEL: combine_permd_as_vpbroadcastd256:
; X86: # %bb.0:
-; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X86-NEXT: vpbroadcastd %xmm0, %ymm0
; X86-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: combine_permd_as_vpbroadcastd256:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X64-NEXT: vpbroadcastd %xmm0, %ymm0
; X64-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
@@ -226,14 +222,12 @@ define <16 x i8> @combine_pshufb_as_vpbr
define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
; X86-LABEL: combine_permd_as_vpbroadcastq256:
; X86: # %bb.0:
-; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X86-NEXT: vpbroadcastq %xmm0, %ymm0
; X86-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: combine_permd_as_vpbroadcastq256:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X64-NEXT: vpbroadcastq %xmm0, %ymm0
; X64-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
@@ -257,7 +251,6 @@ define <4 x float> @combine_pshufb_as_vp
define <8 x float> @combine_permps_as_vpbroadcastss256(<4 x float> %a) {
; CHECK-LABEL: combine_permps_as_vpbroadcastss256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -268,7 +261,6 @@ define <8 x float> @combine_permps_as_vp
define <4 x double> @combine_permps_as_vpbroadcastsd256(<2 x double> %a) {
; CHECK-LABEL: combine_permps_as_vpbroadcastsd256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -313,7 +305,6 @@ define <4 x float> @combine_vpbroadcast_
define <8 x float> @combine_vpbroadcast_permd_as_vpbroadcastss256(<4 x float> %a) {
; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastss256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> zeroinitializer
@@ -324,7 +315,6 @@ define <8 x float> @combine_vpbroadcast_
define <4 x double> @combine_vpbroadcast_permd_as_vpbroadcastsd256(<2 x double> %a) {
; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastsd256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> zeroinitializer
@@ -665,7 +655,8 @@ define <16 x i8> @combine_pshufb_inserti
define <8 x i32> @combine_permd_insertion_as_broadcast_v4i64(i64 %a0) {
; X86-LABEL: combine_permd_insertion_as_broadcast_v4i64:
; X86: # %bb.0:
-; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vbroadcastsd %xmm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: combine_permd_insertion_as_broadcast_v4i64:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=352994&r1=352993&r2=352994&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Sun Feb 3 06:39:41 2019
@@ -975,13 +975,13 @@ define <16 x float> @combine_vpermi2var_
define <8 x i64> @combine_vpermvar_insertion_as_broadcast_v8i64(i64 %a0) {
; X86-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64:
; X86: # %bb.0:
-; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %zmm0
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vbroadcastsd %xmm0, %zmm0
; X86-NEXT: retl
;
; X64-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64:
; X64: # %bb.0:
-; X64-NEXT: vmovq %rdi, %xmm0
-; X64-NEXT: vpbroadcastq %xmm0, %zmm0
+; X64-NEXT: vpbroadcastq %rdi, %zmm0
; X64-NEXT: retq
%1 = insertelement <8 x i64> undef, i64 %a0, i32 0
%2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %1, <8 x i64> zeroinitializer)
Modified: llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widened-broadcast.ll?rev=352994&r1=352993&r2=352994&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widened-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widened-broadcast.ll Sun Feb 3 06:39:41 2019
@@ -630,8 +630,7 @@ define <16 x i32> @load_splat_16i32_2i32
;
; AVX512-LABEL: load_splat_16i32_2i32_0101:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512-NEXT: vbroadcastsd %xmm0, %zmm0
+; AVX512-NEXT: vbroadcastsd (%rdi), %zmm0
; AVX512-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
More information about the llvm-commits
mailing list