[llvm] r320933 - [X86][AVX] lowerVectorShuffleAsBroadcast - aggressively peek through BITCASTs
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 16 15:32:18 PST 2017
Author: rksimon
Date: Sat Dec 16 15:32:18 2017
New Revision: 320933
URL: http://llvm.org/viewvc/llvm-project?rev=320933&view=rev
Log:
[X86][AVX] lowerVectorShuffleAsBroadcast - aggressively peek through BITCASTs
Assuming we can safely adjust the broadcast index for the new type to keep it suitably aligned, then peek through BITCASTs when looking for the broadcast source.
Fixes PR32007
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll
llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=320933&r1=320932&r2=320933&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Dec 16 15:32:18 2017
@@ -10346,9 +10346,16 @@ static SDValue lowerVectorShuffleAsBroad
for (;;) {
switch (V.getOpcode()) {
case ISD::BITCAST: {
+ // Peek through bitcasts as long as BroadcastIdx can be adjusted.
SDValue VSrc = V.getOperand(0);
- MVT SrcVT = VSrc.getSimpleValueType();
- if (VT.getScalarSizeInBits() != SrcVT.getScalarSizeInBits())
+ unsigned NumEltBits = V.getScalarValueSizeInBits();
+ unsigned NumSrcBits = VSrc.getScalarValueSizeInBits();
+ if ((NumEltBits % NumSrcBits) == 0)
+ BroadcastIdx *= (NumEltBits / NumSrcBits);
+ else if ((NumSrcBits % NumEltBits) == 0 &&
+ (BroadcastIdx % (NumSrcBits / NumEltBits)) == 0)
+ BroadcastIdx /= (NumSrcBits / NumEltBits);
+ else
break;
V = VSrc;
continue;
@@ -10380,6 +10387,23 @@ static SDValue lowerVectorShuffleAsBroad
break;
}
+ // Ensure the source vector and BroadcastIdx are for a suitable type.
+ if (VT.getScalarSizeInBits() != V.getScalarValueSizeInBits()) {
+ unsigned NumEltBits = VT.getScalarSizeInBits();
+ unsigned NumSrcBits = V.getScalarValueSizeInBits();
+ if ((NumSrcBits % NumEltBits) == 0)
+ BroadcastIdx *= (NumSrcBits / NumEltBits);
+ else if ((NumEltBits % NumSrcBits) == 0 &&
+ (BroadcastIdx % (NumEltBits / NumSrcBits)) == 0)
+ BroadcastIdx /= (NumEltBits / NumSrcBits);
+ else
+ return SDValue();
+
+ unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
+ MVT SrcVT = MVT::getVectorVT(VT.getScalarType(), NumSrcElts);
+ V = DAG.getBitcast(SrcVT, V);
+ }
+
// Check if this is a broadcast of a scalar. We special case lowering
// for scalars so that we can more effectively fold with loads.
// First, look through bitcast: if the original value has a larger element
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll?rev=320933&r1=320932&r2=320933&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll Sat Dec 16 15:32:18 2017
@@ -579,8 +579,7 @@ define <8 x double> @test_masked_z_4xdou
define <8 x float> @test_2xfloat_to_8xfloat_mem(<2 x float>* %vp) {
; CHECK-LABEL: test_2xfloat_to_8xfloat_mem:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -589,10 +588,9 @@ define <8 x float> @test_2xfloat_to_8xfl
define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -604,10 +602,9 @@ define <8 x float> @test_masked_2xfloat_
define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -618,10 +615,9 @@ define <8 x float> @test_masked_z_2xfloa
define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -633,10 +629,9 @@ define <8 x float> @test_masked_2xfloat_
define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -647,10 +642,9 @@ define <8 x float> @test_masked_z_2xfloa
define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -662,10 +656,9 @@ define <8 x float> @test_masked_2xfloat_
define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -676,10 +669,9 @@ define <8 x float> @test_masked_z_2xfloa
define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -691,10 +683,9 @@ define <8 x float> @test_masked_2xfloat_
define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -705,8 +696,7 @@ define <8 x float> @test_masked_z_2xfloa
define <16 x float> @test_2xfloat_to_16xfloat_mem(<2 x float>* %vp) {
; CHECK-LABEL: test_2xfloat_to_16xfloat_mem:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
+; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -715,10 +705,9 @@ define <16 x float> @test_2xfloat_to_16x
define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -730,10 +719,9 @@ define <16 x float> @test_masked_2xfloat
define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -744,10 +732,9 @@ define <16 x float> @test_masked_z_2xflo
define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -759,10 +746,9 @@ define <16 x float> @test_masked_2xfloat
define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -773,10 +759,9 @@ define <16 x float> @test_masked_z_2xflo
define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -788,10 +773,9 @@ define <16 x float> @test_masked_2xfloat
define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -802,10 +786,9 @@ define <16 x float> @test_masked_z_2xflo
define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -817,10 +800,9 @@ define <16 x float> @test_masked_2xfloat
define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
-; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
%shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
Modified: llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widened-broadcast.ll?rev=320933&r1=320932&r2=320933&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widened-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widened-broadcast.ll Sat Dec 16 15:32:18 2017
@@ -121,21 +121,10 @@ define <8 x i32> @load_splat_8i32_4i32_0
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_8i32_4i32_01010101:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_8i32_4i32_01010101:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_8i32_4i32_01010101:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_8i32_4i32_01010101:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <4 x i32>, <4 x i32>* %ptr
%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -257,21 +246,10 @@ define <16 x i16> @load_splat_16i16_8i16
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_16i16_8i16_0123012301230123:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_16i16_8i16_0123012301230123:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_16i16_8i16_0123012301230123:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_16i16_8i16_0123012301230123:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <8 x i16>, <8 x i16>* %ptr
%ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -468,21 +446,10 @@ define <32 x i8> @load_splat_32i8_16i8_0
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <16 x i8>, <16 x i8>* %ptr
%ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
More information about the llvm-commits
mailing list