[llvm] a404bae - [X86][SSE] Add non-SSE41 target PTEST tests

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 14 04:28:56 PDT 2020


Author: Simon Pilgrim
Date: 2020-06-14T12:23:10+01:00
New Revision: a404bae288c75aafaed4575ecc2fc634664077af

URL: https://github.com/llvm/llvm-project/commit/a404bae288c75aafaed4575ecc2fc634664077af
DIFF: https://github.com/llvm/llvm-project/commit/a404bae288c75aafaed4575ecc2fc634664077af.diff

LOG: [X86][SSE] Add non-SSE41 target PTEST tests

Ensure codegen is still reasonable - ideally we'd make use of MOVMSK for this.

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/ptest.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/ptest.ll b/llvm/test/CodeGen/X86/ptest.ll
index fe69c60ec68f..605dfc6e2165 100644
--- a/llvm/test/CodeGen/X86/ptest.ll
+++ b/llvm/test/CodeGen/X86/ptest.ll
@@ -1,9 +1,24 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx < %s | FileCheck %s --check-prefix=SSE41
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 < %s   | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw < %s   | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2,-avx | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx | FileCheck %s --check-prefixes=SSE,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2   | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
 
 define i32 @veccond128(<4 x i32> %input) {
+; SSE2-LABEL: veccond128:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    movq %xmm0, %rax
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movq %xmm0, %rcx
+; SSE2-NEXT:    orq %rax, %rcx
+; SSE2-NEXT:    je .LBB0_2
+; SSE2-NEXT:  # %bb.1: # %if-true-block
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    retq
+; SSE2-NEXT:  .LBB0_2: # %endif-block
+; SSE2-NEXT:    movl $1, %eax
+; SSE2-NEXT:    retq
+;
 ; SSE41-LABEL: veccond128:
 ; SSE41:       # %bb.0: # %entry
 ; SSE41-NEXT:    ptest %xmm0, %xmm0
@@ -36,6 +51,25 @@ endif-block:
 }
 
 define i32 @veccond256(<8 x i32> %input) {
+; SSE2-LABEL: veccond256:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movq %xmm2, %rax
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movq %xmm2, %rcx
+; SSE2-NEXT:    orq %rax, %rcx
+; SSE2-NEXT:    movq %xmm1, %rax
+; SSE2-NEXT:    movq %xmm0, %rdx
+; SSE2-NEXT:    orq %rax, %rdx
+; SSE2-NEXT:    orq %rcx, %rdx
+; SSE2-NEXT:    je .LBB1_2
+; SSE2-NEXT:  # %bb.1: # %if-true-block
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    retq
+; SSE2-NEXT:  .LBB1_2: # %endif-block
+; SSE2-NEXT:    movl $1, %eax
+; SSE2-NEXT:    retq
+;
 ; SSE41-LABEL: veccond256:
 ; SSE41:       # %bb.0: # %entry
 ; SSE41-NEXT:    por %xmm1, %xmm0
@@ -71,6 +105,35 @@ endif-block:
 }
 
 define i32 @veccond512(<16 x i32> %input) {
+; SSE2-LABEL: veccond512:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %rax
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %rcx
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %rdx
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %rsi
+; SSE2-NEXT:    orq %rdx, %rsi
+; SSE2-NEXT:    orq %rax, %rsi
+; SSE2-NEXT:    orq %rcx, %rsi
+; SSE2-NEXT:    movq %xmm2, %rax
+; SSE2-NEXT:    movq %xmm0, %rcx
+; SSE2-NEXT:    movq %xmm3, %rdx
+; SSE2-NEXT:    movq %xmm1, %rdi
+; SSE2-NEXT:    orq %rdx, %rdi
+; SSE2-NEXT:    orq %rax, %rdi
+; SSE2-NEXT:    orq %rcx, %rdi
+; SSE2-NEXT:    orq %rsi, %rdi
+; SSE2-NEXT:    je .LBB2_2
+; SSE2-NEXT:  # %bb.1: # %if-true-block
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    retq
+; SSE2-NEXT:  .LBB2_2: # %endif-block
+; SSE2-NEXT:    movl $1, %eax
+; SSE2-NEXT:    retq
+;
 ; SSE41-LABEL: veccond512:
 ; SSE41:       # %bb.0: # %entry
 ; SSE41-NEXT:    por %xmm3, %xmm1
@@ -139,6 +202,16 @@ endif-block:
 }
 
 define i32 @vectest128(<4 x i32> %input) {
+; SSE2-LABEL: vectest128:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movq %xmm0, %rcx
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movq %xmm0, %rdx
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    orq %rcx, %rdx
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
 ; SSE41-LABEL: vectest128:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    xorl %eax, %eax
@@ -159,6 +232,21 @@ define i32 @vectest128(<4 x i32> %input) {
 }
 
 define i32 @vectest256(<8 x i32> %input) {
+; SSE2-LABEL: vectest256:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movq %xmm2, %rax
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movq %xmm2, %rcx
+; SSE2-NEXT:    orq %rax, %rcx
+; SSE2-NEXT:    movq %xmm1, %rax
+; SSE2-NEXT:    movq %xmm0, %rdx
+; SSE2-NEXT:    orq %rax, %rdx
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    orq %rcx, %rdx
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
 ; SSE41-LABEL: vectest256:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    por %xmm1, %xmm0
@@ -181,6 +269,31 @@ define i32 @vectest256(<8 x i32> %input) {
 }
 
 define i32 @vectest512(<16 x i32> %input) {
+; SSE2-LABEL: vectest512:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %rax
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %rcx
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %rdx
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %rsi
+; SSE2-NEXT:    orq %rdx, %rsi
+; SSE2-NEXT:    orq %rax, %rsi
+; SSE2-NEXT:    orq %rcx, %rsi
+; SSE2-NEXT:    movq %xmm2, %rax
+; SSE2-NEXT:    movq %xmm0, %rcx
+; SSE2-NEXT:    movq %xmm3, %rdx
+; SSE2-NEXT:    movq %xmm1, %rdi
+; SSE2-NEXT:    orq %rdx, %rdi
+; SSE2-NEXT:    orq %rax, %rdi
+; SSE2-NEXT:    orq %rcx, %rdi
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    orq %rsi, %rdi
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
 ; SSE41-LABEL: vectest512:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    por %xmm3, %xmm1
@@ -231,6 +344,16 @@ define i32 @vectest512(<16 x i32> %input) {
 }
 
 define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) {
+; SSE2-LABEL: vecsel128:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movl %edi, %eax
+; SSE2-NEXT:    movq %xmm0, %rcx
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movq %xmm0, %rdx
+; SSE2-NEXT:    orq %rcx, %rdx
+; SSE2-NEXT:    cmovel %esi, %eax
+; SSE2-NEXT:    retq
+;
 ; SSE41-LABEL: vecsel128:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movl %edi, %eax
@@ -251,6 +374,21 @@ define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) {
 }
 
 define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) {
+; SSE2-LABEL: vecsel256:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movl %edi, %eax
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movq %xmm2, %rcx
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movq %xmm2, %rdx
+; SSE2-NEXT:    orq %rcx, %rdx
+; SSE2-NEXT:    movq %xmm1, %rcx
+; SSE2-NEXT:    movq %xmm0, %rdi
+; SSE2-NEXT:    orq %rcx, %rdi
+; SSE2-NEXT:    orq %rdx, %rdi
+; SSE2-NEXT:    cmovel %esi, %eax
+; SSE2-NEXT:    retq
+;
 ; SSE41-LABEL: vecsel256:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movl %edi, %eax
@@ -273,6 +411,31 @@ define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) {
 }
 
 define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) {
+; SSE2-LABEL: vecsel512:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movl %edi, %eax
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %r8
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %rdx
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %rdi
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movq %xmm4, %rcx
+; SSE2-NEXT:    orq %rdi, %rcx
+; SSE2-NEXT:    orq %r8, %rcx
+; SSE2-NEXT:    orq %rdx, %rcx
+; SSE2-NEXT:    movq %xmm2, %r9
+; SSE2-NEXT:    movq %xmm0, %r8
+; SSE2-NEXT:    movq %xmm3, %rdi
+; SSE2-NEXT:    movq %xmm1, %rdx
+; SSE2-NEXT:    orq %rdi, %rdx
+; SSE2-NEXT:    orq %r9, %rdx
+; SSE2-NEXT:    orq %r8, %rdx
+; SSE2-NEXT:    orq %rcx, %rdx
+; SSE2-NEXT:    cmovel %esi, %eax
+; SSE2-NEXT:    retq
+;
 ; SSE41-LABEL: vecsel512:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movl %edi, %eax


        


More information about the llvm-commits mailing list