[llvm] r324017 - [X86][SSE] Add SSE41 to variable permute tests

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 1 14:05:44 PST 2018


Author: rksimon
Date: Thu Feb  1 14:05:44 2018
New Revision: 324017

URL: http://llvm.org/viewvc/llvm-project?rev=324017&view=rev
Log:
[X86][SSE] Add SSE41 to variable permute tests

Modified:
    llvm/trunk/test/CodeGen/X86/var-permute-128.ll

Modified: llvm/trunk/test/CodeGen/X86/var-permute-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/var-permute-128.ll?rev=324017&r1=324016&r2=324017&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/var-permute-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/var-permute-128.ll Thu Feb  1 14:05:44 2018
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE,SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE,SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,XOP
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX2
@@ -22,6 +23,18 @@ define <2 x i64> @var_shuffle_v2i64(<2 x
 ; SSSE3-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
+; SSE41-LABEL: var_shuffle_v2i64:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movq %xmm1, %rax
+; SSE41-NEXT:    andl $1, %eax
+; SSE41-NEXT:    pextrq $1, %xmm1, %rcx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE41-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE41-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    retq
+;
 ; AVX-LABEL: var_shuffle_v2i64:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
@@ -49,6 +62,13 @@ define <4 x i32> @var_shuffle_v4i32(<4 x
 ; SSSE3-NEXT:    pshufb %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
+; SSE41-LABEL: var_shuffle_v4i32:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    paddd {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    pshufb %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
 ; AVX-LABEL: var_shuffle_v4i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpermilps %xmm1, %xmm0, %xmm0
@@ -69,12 +89,12 @@ define <4 x i32> @var_shuffle_v4i32(<4 x
 }
 
 define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind {
-; SSSE3-LABEL: var_shuffle_v8i16:
-; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    pmullw {{.*}}(%rip), %xmm1
-; SSSE3-NEXT:    paddw {{.*}}(%rip), %xmm1
-; SSSE3-NEXT:    pshufb %xmm1, %xmm0
-; SSSE3-NEXT:    retq
+; SSE-LABEL: var_shuffle_v8i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pmullw {{.*}}(%rip), %xmm1
+; SSE-NEXT:    paddw {{.*}}(%rip), %xmm1
+; SSE-NEXT:    pshufb %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
 ; AVXNOVLBW-LABEL: var_shuffle_v8i16:
 ; AVXNOVLBW:       # %bb.0:
@@ -115,10 +135,10 @@ define <8 x i16> @var_shuffle_v8i16(<8 x
 }
 
 define <16 x i8> @var_shuffle_v16i8(<16 x i8> %v, <16 x i8> %indices) nounwind {
-; SSSE3-LABEL: var_shuffle_v16i8:
-; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    pshufb %xmm1, %xmm0
-; SSSE3-NEXT:    retq
+; SSE-LABEL: var_shuffle_v16i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pshufb %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: var_shuffle_v16i8:
 ; AVX:       # %bb.0:
@@ -188,6 +208,17 @@ define <2 x double> @var_shuffle_v2f64(<
 ; SSSE3-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
 ; SSSE3-NEXT:    retq
 ;
+; SSE41-LABEL: var_shuffle_v2f64:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movq %xmm1, %rax
+; SSE41-NEXT:    andl $1, %eax
+; SSE41-NEXT:    pextrq $1, %xmm1, %rcx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE41-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; SSE41-NEXT:    retq
+;
 ; AVX-LABEL: var_shuffle_v2f64:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
@@ -215,6 +246,13 @@ define <4 x float> @var_shuffle_v4f32(<4
 ; SSSE3-NEXT:    pshufb %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
+; SSE41-LABEL: var_shuffle_v4f32:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    paddd {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    pshufb %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
 ; AVX-LABEL: var_shuffle_v4f32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpermilps %xmm1, %xmm0, %xmm0
@@ -235,10 +273,10 @@ define <4 x float> @var_shuffle_v4f32(<4
 }
 
 define <16 x i8> @var_shuffle_v16i8_from_v16i8_v32i8(<16 x i8> %v, <32 x i8> %indices) nounwind {
-; SSSE3-LABEL: var_shuffle_v16i8_from_v16i8_v32i8:
-; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    pshufb %xmm1, %xmm0
-; SSSE3-NEXT:    retq
+; SSE-LABEL: var_shuffle_v16i8_from_v16i8_v32i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pshufb %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: var_shuffle_v16i8_from_v16i8_v32i8:
 ; AVX:       # %bb.0:
@@ -435,6 +473,97 @@ define <16 x i8> @var_shuffle_v16i8_from
 ; SSSE3-NEXT:    popq %rbp
 ; SSSE3-NEXT:    retq
 ;
+; SSE41-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pushq %rbp
+; SSE41-NEXT:    movq %rsp, %rbp
+; SSE41-NEXT:    andq $-32, %rsp
+; SSE41-NEXT:    subq $544, %rsp # imm = 0x220
+; SSE41-NEXT:    pextrb $0, %xmm2, %eax
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movaps %xmm0, (%rsp)
+; SSE41-NEXT:    movzbl 480(%rsp,%rax), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    pextrb $1, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $1, 448(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $2, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $2, 416(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $3, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $3, 384(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $4, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $4, 352(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $5, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $5, 320(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $6, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $6, 288(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $7, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $7, 256(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $8, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $8, 224(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $9, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $9, 192(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $10, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $10, 160(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $11, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $11, 128(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $12, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $12, 96(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $13, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $13, 64(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $14, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $14, 32(%rsp,%rax), %xmm0
+; SSE41-NEXT:    pextrb $15, %xmm2, %eax
+; SSE41-NEXT:    andl $31, %eax
+; SSE41-NEXT:    pinsrb $15, (%rsp,%rax), %xmm0
+; SSE41-NEXT:    movq %rbp, %rsp
+; SSE41-NEXT:    popq %rbp
+; SSE41-NEXT:    retq
+;
 ; AVX-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    pushq %rbp




More information about the llvm-commits mailing list