[llvm] r350707 - [X86] Add extra test coverage for combining shuffles to PACKSS/PACKUS

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 9 04:34:10 PST 2019


Author: rksimon
Date: Wed Jan  9 04:34:10 2019
New Revision: 350707

URL: http://llvm.org/viewvc/llvm-project?rev=350707&view=rev
Log:
[X86] Add extra test coverage for combining shuffles to PACKSS/PACKUS

Modified:
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=350707&r1=350706&r2=350707&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Wed Jan  9 04:34:10 2019
@@ -611,6 +611,44 @@ define <32 x i8> @shuffle_combine_packus
 }
 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
 
+define <32 x i8> @combine_pshufb_as_packsswb(<16 x i16> %a0, <16 x i16> %a1) nounwind {
+; CHECK-LABEL: combine_pshufb_as_packsswb:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpsraw $11, %ymm0, %ymm0
+; CHECK-NEXT:    vpsraw $11, %ymm1, %ymm1
+; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT:    vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,18,20,22,24,26,28,30]
+; CHECK-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %1 = ashr <16 x i16> %a0, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %2 = ashr <16 x i16> %a1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %3 = bitcast <16 x i16> %1 to <32 x i8>
+  %4 = bitcast <16 x i16> %2 to <32 x i8>
+  %5 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> <i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+  %6 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %4, <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14>)
+  %7 = or <32 x i8> %5, %6
+  ret <32 x i8> %7
+}
+
+define <32 x i8> @combine_pshufb_as_packuswb(<16 x i16> %a0, <16 x i16> %a1) nounwind {
+; CHECK-LABEL: combine_pshufb_as_packuswb:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpsrlw $11, %ymm0, %ymm0
+; CHECK-NEXT:    vpsrlw $11, %ymm1, %ymm1
+; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT:    vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,18,20,22,24,26,28,30]
+; CHECK-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %1 = lshr <16 x i16> %a0, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %2 = lshr <16 x i16> %a1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %3 = bitcast <16 x i16> %1 to <32 x i8>
+  %4 = bitcast <16 x i16> %2 to <32 x i8>
+  %5 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> <i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+  %6 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %4, <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14>)
+  %7 = or <32 x i8> %5, %6
+  ret <32 x i8> %7
+}
+
 define <16 x i8> @combine_pshufb_insertion_as_broadcast_v2i64(i64 %a0) {
 ; X86-LABEL: combine_pshufb_insertion_as_broadcast_v2i64:
 ; X86:       # %bb.0:

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=350707&r1=350706&r2=350707&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Wed Jan  9 04:34:10 2019
@@ -824,6 +824,62 @@ define <32 x i16> @combine_pshufb_as_psh
   ret <32 x i16> %1
 }
 
+define <64 x i8> @combine_pshufb_as_packsswb(<32 x i16> %a0, <32 x i16> %a1) nounwind {
+; X32-LABEL: combine_pshufb_as_packsswb:
+; X32:       # %bb.0:
+; X32-NEXT:    vpsraw $11, %zmm0, %zmm0
+; X32-NEXT:    vpsraw $11, %zmm1, %zmm1
+; X32-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero
+; X32-NEXT:    vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62]
+; X32-NEXT:    vporq %zmm1, %zmm0, %zmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: combine_pshufb_as_packsswb:
+; X64:       # %bb.0:
+; X64-NEXT:    vpsraw $11, %zmm0, %zmm0
+; X64-NEXT:    vpsraw $11, %zmm1, %zmm1
+; X64-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT:    vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62]
+; X64-NEXT:    vporq %zmm1, %zmm0, %zmm0
+; X64-NEXT:    retq
+  %1 = ashr <32 x i16> %a0, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %2 = ashr <32 x i16> %a1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %3 = bitcast <32 x i16> %1 to <64 x i8>
+  %4 = bitcast <32 x i16> %2 to <64 x i8>
+  %5 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %3, <64 x i8> <i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <64 x i8> undef, i64 -1)
+  %6 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %4, <64 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14>, <64 x i8> undef, i64 -1)
+  %7 = or <64 x i8> %5, %6
+  ret <64 x i8> %7
+}
+
+define <64 x i8> @combine_pshufb_as_packuswb(<32 x i16> %a0, <32 x i16> %a1) nounwind {
+; X32-LABEL: combine_pshufb_as_packuswb:
+; X32:       # %bb.0:
+; X32-NEXT:    vpsrlw $11, %zmm0, %zmm0
+; X32-NEXT:    vpsrlw $11, %zmm1, %zmm1
+; X32-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero
+; X32-NEXT:    vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62]
+; X32-NEXT:    vporq %zmm1, %zmm0, %zmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: combine_pshufb_as_packuswb:
+; X64:       # %bb.0:
+; X64-NEXT:    vpsrlw $11, %zmm0, %zmm0
+; X64-NEXT:    vpsrlw $11, %zmm1, %zmm1
+; X64-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT:    vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62]
+; X64-NEXT:    vporq %zmm1, %zmm0, %zmm0
+; X64-NEXT:    retq
+  %1 = lshr <32 x i16> %a0, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %2 = lshr <32 x i16> %a1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %3 = bitcast <32 x i16> %1 to <64 x i8>
+  %4 = bitcast <32 x i16> %2 to <64 x i8>
+  %5 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %3, <64 x i8> <i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <64 x i8> undef, i64 -1)
+  %6 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %4, <64 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14>, <64 x i8> undef, i64 -1)
+  %7 = or <64 x i8> %5, %6
+  ret <64 x i8> %7
+}
+
 define <32 x i16> @combine_vpermi2var_32i16_as_pshufb(<32 x i16> %a0) {
 ; X32-LABEL: combine_vpermi2var_32i16_as_pshufb:
 ; X32:       # %bb.0:




More information about the llvm-commits mailing list