[llvm] r314628 - [X86][SSE] Add shuffle combining tests with PACKSS/PACKUS
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 1 10:30:44 PDT 2017
Author: rksimon
Date: Sun Oct 1 10:30:44 2017
New Revision: 314628
URL: http://llvm.org/viewvc/llvm-project?rev=314628&view=rev
Log:
[X86][SSE] Add shuffle combining tests with PACKSS/PACKUS
Modified:
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=314628&r1=314627&r2=314628&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Sun Oct 1 10:30:44 2017
@@ -804,6 +804,96 @@ define <32 x i8> @combine_unpack_unpack_
ret <32 x i8> %6
}
+define <16 x i16> @shuffle_combine_packssdw_pshufb(<8 x i32> %a0) {
+; X32-LABEL: shuffle_combine_packssdw_pshufb:
+; X32: # BB#0:
+; X32-NEXT: vpsrad $31, %ymm0, %ymm0
+; X32-NEXT: vpackssdw %ymm0, %ymm0, %ymm0
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X32-NEXT: retl
+;
+; X64-LABEL: shuffle_combine_packssdw_pshufb:
+; X64: # BB#0:
+; X64-NEXT: vpsrad $31, %ymm0, %ymm0
+; X64-NEXT: vpackssdw %ymm0, %ymm0, %ymm0
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X64-NEXT: retq
+ %1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %1)
+ %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0, i32 8, i32 9, i32 10, i32 11, i32 11, i32 10, i32 9, i32 8>
+ ret <16 x i16> %3
+}
+declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <32 x i8> @shuffle_combine_packsswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) {
+; X32-LABEL: shuffle_combine_packsswb_pshufb:
+; X32: # BB#0:
+; X32-NEXT: vpsraw $15, %ymm0, %ymm0
+; X32-NEXT: vpsraw $15, %ymm1, %ymm1
+; X32-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X32-NEXT: retl
+;
+; X64-LABEL: shuffle_combine_packsswb_pshufb:
+; X64: # BB#0:
+; X64-NEXT: vpsraw $15, %ymm0, %ymm0
+; X64-NEXT: vpsraw $15, %ymm1, %ymm1
+; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X64-NEXT: retq
+ %1 = ashr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %2 = ashr <16 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %3 = tail call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2)
+ %4 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <32 x i8> %4
+}
+declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <16 x i16> @shuffle_combine_packusdw_pshufb(<8 x i32> %a0, <8 x i32> %a1) {
+; X32-LABEL: shuffle_combine_packusdw_pshufb:
+; X32: # BB#0:
+; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
+; X32-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X32-NEXT: retl
+;
+; X64-LABEL: shuffle_combine_packusdw_pshufb:
+; X64: # BB#0:
+; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X64-NEXT: retq
+ %1 = and <8 x i32> %a0, <i32 255, i32 65535, i32 255, i32 65535, i32 255, i32 255, i32 255, i32 65535>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %1)
+ %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0, i32 8, i32 9, i32 10, i32 11, i32 11, i32 10, i32 9, i32 8>
+ ret <16 x i16> %3
+}
+declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <32 x i8> @shuffle_combine_packuswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) {
+; X32-LABEL: shuffle_combine_packuswb_pshufb:
+; X32: # BB#0:
+; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
+; X32-NEXT: vpsrlw $8, %ymm1, %ymm1
+; X32-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X32-NEXT: retl
+;
+; X64-LABEL: shuffle_combine_packuswb_pshufb:
+; X64: # BB#0:
+; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
+; X64-NEXT: vpsrlw $8, %ymm1, %ymm1
+; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X64-NEXT: retq
+ %1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %2 = lshr <16 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %3 = tail call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2)
+ %4 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <32 x i8> %4
+}
+declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
+
define <16 x i8> @combine_pshufb_insertion_as_broadcast_v2i64(i64 %a0) {
; X32-LABEL: combine_pshufb_insertion_as_broadcast_v2i64:
; X32: # BB#0:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll?rev=314628&r1=314627&r2=314628&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll Sun Oct 1 10:30:44 2017
@@ -640,6 +640,72 @@ define <8 x i16> @shuffle_combine_unpack
ret <8 x i16> %8
}
+define <16 x i8> @shuffle_combine_packssdw_pshufb(<4 x i32> %a0) {
+; SSE-LABEL: shuffle_combine_packssdw_pshufb:
+; SSE: # BB#0:
+; SSE-NEXT: psrad $31, %xmm0
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_combine_packssdw_pshufb:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; AVX-NEXT: retq
+ %1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31>
+ %2 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %1)
+ %3 = bitcast <8 x i16> %2 to <16 x i8>
+ %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>)
+ ret <16 x i8> %4
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @shuffle_combine_packsswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) {
+; SSE-LABEL: shuffle_combine_packsswb_pshufb:
+; SSE: # BB#0:
+; SSE-NEXT: psraw $15, %xmm0
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_combine_packsswb_pshufb:
+; AVX: # BB#0:
+; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0]
+; AVX-NEXT: retq
+ %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %2 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %3 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2)
+ %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <16 x i8> %4
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @shuffle_combine_packuswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) {
+; SSE-LABEL: shuffle_combine_packuswb_pshufb:
+; SSE: # BB#0:
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_combine_packuswb_pshufb:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
+; AVX-NEXT: retq
+ %1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %3 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2)
+ %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <16 x i8> %4
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
define <16 x i8> @constant_fold_pshufb() {
; SSE-LABEL: constant_fold_pshufb:
; SSE: # BB#0:
More information about the llvm-commits
mailing list