[llvm] r353198 - [X86][SSE] Disable ZERO_EXTEND shuffle combining
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 5 11:15:48 PST 2019
Author: rksimon
Date: Tue Feb 5 11:15:48 2019
New Revision: 353198
URL: http://llvm.org/viewvc/llvm-project?rev=353198&view=rev
Log:
[X86][SSE] Disable ZERO_EXTEND shuffle combining
rL352997 enabled ZERO_EXTEND from non-shuffle-able value types. I've disabled it for now to fix a regression identified by @asbirlea until I can fix this properly.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=353198&r1=353197&r2=353198&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Feb 5 11:15:48 2019
@@ -6793,8 +6793,8 @@ static bool getFauxShuffleMask(SDValue N
Mask.append(NumElts, 0);
return true;
}
- case ISD::ZERO_EXTEND_VECTOR_INREG:
- case ISD::ZERO_EXTEND: {
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ // TODO: Handle ISD::ZERO_EXTEND
SDValue Src = N.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits();
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=353198&r1=353197&r2=353198&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Tue Feb 5 11:15:48 2019
@@ -1526,8 +1526,9 @@ define <8 x i32> @shuffle_v8i32_08192a3b
;
; AVX512VL-LABEL: shuffle_v8i32_08192a3b:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,8,1,9,2,10,3,11]
-; AVX512VL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11]
+; AVX512VL-NEXT: vpermi2d %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i32> %shuffle
@@ -1571,23 +1572,11 @@ define <8 x i32> @shuffle_v8i32_091b2d3f
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v8i32_091b2d3f:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
-; AVX2-NEXT: retq
-;
-; AVX512VL-SLOW-LABEL: shuffle_v8i32_091b2d3f:
-; AVX512VL-SLOW: # %bb.0:
-; AVX512VL-SLOW-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
-; AVX512VL-SLOW-NEXT: retq
-;
-; AVX512VL-FAST-LABEL: shuffle_v8i32_091b2d3f:
-; AVX512VL-FAST: # %bb.0:
-; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,9,1,11,2,13,3,15]
-; AVX512VL-FAST-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
-; AVX512VL-FAST-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v8i32_091b2d3f:
+; AVX2OR512VL: # %bb.0:
+; AVX2OR512VL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
ret <8 x i32> %shuffle
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=353198&r1=353197&r2=353198&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Tue Feb 5 11:15:48 2019
@@ -760,3 +760,29 @@ entry:
%shuf2 = shufflevector <8 x float> %inp1, <8 x float> %shuf1, <8 x i32> <i32 15, i32 10, i32 7, i32 2, i32 12, i32 undef, i32 3, i32 2>
ret <8 x float> %shuf2
}
+
+define void @packss_zext_v8i1() {
+; X86-LABEL: packss_zext_v8i1:
+; X86: # %bb.0:
+; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X86-NEXT: vmovups %ymm0, (%eax)
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+;
+; X64-LABEL: packss_zext_v8i1:
+; X64: # %bb.0:
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X64-NEXT: vmovups %ymm0, (%rax)
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+ %tmp0 = icmp sgt <8 x i32> undef, undef
+ %tmp1 = zext <8 x i1> %tmp0 to <8 x i32>
+ %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %tmp3 = trunc <16 x i32> %tmp2 to <16 x i16>
+ %tmp4 = add <16 x i16> zeroinitializer, %tmp3
+ %tmp6 = sext <16 x i16> %tmp4 to <16 x i32>
+ %tmp10 = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
+ %tmp11 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> %tmp10)
+ store <16 x i16> %tmp11, <16 x i16>* undef, align 2
+ ret void
+}
More information about the llvm-commits
mailing list