[llvm] r307042 - [X86][SSE4A] Add SSE4A shuffle tests on pre-SSSE3 hardware

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 3 09:53:11 PDT 2017


Author: rksimon
Date: Mon Jul  3 09:53:11 2017
New Revision: 307042

URL: http://llvm.org/viewvc/llvm-project?rev=307042&view=rev
Log:
[X86][SSE4A] Add SSE4A shuffle tests on pre-SSSE3 hardware

Modified:
    llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll?rev=307042&r1=307041&r2=307042&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll Mon Jul  3 09:53:11 2017
@@ -1,4 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=AMD10H
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2
 
@@ -36,6 +37,11 @@ define <2 x i64> @extrqi_len32_idx48(<2
 }
 
 define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) {
+; AMD10H-LABEL: shuf_0zzzuuuuuuuuuuuu:
+; AMD10H:       # BB#0:
+; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; AMD10H-NEXT:    retq
+;
 ; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu:
 ; BTVER1:       # BB#0:
 ; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
@@ -50,6 +56,14 @@ define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(
 }
 
 define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) {
+; AMD10H-LABEL: shuf_0zzzzzzz1zzzzzzz:
+; AMD10H:       # BB#0:
+; AMD10H-NEXT:    movdqa %xmm0, %xmm1
+; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AMD10H-NEXT:    retq
+;
 ; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz:
 ; BTVER1:       # BB#0:
 ; BTVER1-NEXT:    movdqa %xmm0, %xmm1
@@ -67,6 +81,14 @@ define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(
 }
 
 define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) {
+; AMD10H-LABEL: shuf_2zzzzzzz3zzzzzzz:
+; AMD10H:       # BB#0:
+; AMD10H-NEXT:    movdqa %xmm0, %xmm1
+; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AMD10H-NEXT:    retq
+;
 ; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz:
 ; BTVER1:       # BB#0:
 ; BTVER1-NEXT:    movdqa %xmm0, %xmm1
@@ -85,6 +107,11 @@ define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(
 }
 
 define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) {
+; AMD10H-LABEL: shuf_01zzuuuuuuuuuuuu:
+; AMD10H:       # BB#0:
+; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; AMD10H-NEXT:    retq
+;
 ; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu:
 ; BTVER1:       # BB#0:
 ; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
@@ -99,6 +126,14 @@ define <16 x i8> @shuf_01zzuuuuuuuuuuuu(
 }
 
 define <16 x i8> @shuf_01zzzzzz23zzzzzz(<16 x i8> %a0) {
+; AMD10H-LABEL: shuf_01zzzzzz23zzzzzz:
+; AMD10H:       # BB#0:
+; AMD10H-NEXT:    movdqa %xmm0, %xmm1
+; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AMD10H-NEXT:    retq
+;
 ; BTVER1-LABEL: shuf_01zzzzzz23zzzzzz:
 ; BTVER1:       # BB#0:
 ; BTVER1-NEXT:    movdqa %xmm0, %xmm1
@@ -152,6 +187,14 @@ define <8 x i16> @shuf_012zuuuu(<8 x i16
 }
 
 define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) {
+; AMD10H-LABEL: shuf_0zzz1zzz:
+; AMD10H:       # BB#0:
+; AMD10H-NEXT:    movdqa %xmm0, %xmm1
+; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AMD10H-NEXT:    retq
+;
 ; BTVER1-LABEL: shuf_0zzz1zzz:
 ; BTVER1:       # BB#0:
 ; BTVER1-NEXT:    movdqa %xmm0, %xmm1
@@ -169,6 +212,12 @@ define <8 x i16> @shuf_0zzz1zzz(<8 x i16
 }
 
 define <4 x i32> @shuf_0z1z(<4 x i32> %a0) {
+; AMD10H-LABEL: shuf_0z1z:
+; AMD10H:       # BB#0:
+; AMD10H-NEXT:    pxor %xmm1, %xmm1
+; AMD10H-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AMD10H-NEXT:    retq
+;
 ; BTVER1-LABEL: shuf_0z1z:
 ; BTVER1:       # BB#0:
 ; BTVER1-NEXT:    pxor %xmm1, %xmm1
@@ -303,6 +352,15 @@ define <8 x i16> @shuf_089uuuuu(<8 x i16
 
 ; Out of range.
 define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) {
+; AMD10H-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
+; AMD10H:       # BB#0:
+; AMD10H-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AMD10H-NEXT:    andpd {{.*}}(%rip), %xmm0
+; AMD10H-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AMD10H-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
+; AMD10H-NEXT:    packuswb %xmm0, %xmm0
+; AMD10H-NEXT:    retq
+;
 ; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
 ; BTVER1:       # BB#0:
 ; BTVER1-NEXT:    psrld $16, %xmm1
@@ -321,6 +379,13 @@ define <16 x i8> @shuffle_8_18_uuuuuuuuu
 }
 
 define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
+; AMD10H-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AMD10H:       # BB#0:
+; AMD10H-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AMD10H-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AMD10H-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; AMD10H-NEXT:    retq
+;
 ; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
 ; BTVER1:       # BB#0:
 ; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
@@ -335,6 +400,12 @@ define <16 x i8> @shuffle_uu_0_5_uu_uu_u
 }
 
 define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
+; AMD10H-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AMD10H:       # BB#0:
+; AMD10H-NEXT:    psrlq $16, %xmm0
+; AMD10H-NEXT:    pand {{.*}}(%rip), %xmm0
+; AMD10H-NEXT:    retq
+;
 ; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
 ; BTVER1:       # BB#0:
 ; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]




More information about the llvm-commits mailing list