[llvm] 56656f8 - [X86] Add SSE2 test coverage to vector comparison all_of/any_of tests

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 30 06:01:08 PST 2023


Author: Simon Pilgrim
Date: 2023-01-30T14:00:53Z
New Revision: 56656f8378c842ef83af2ec7606e185d6fdefaf1

URL: https://github.com/llvm/llvm-project/commit/56656f8378c842ef83af2ec7606e185d6fdefaf1
DIFF: https://github.com/llvm/llvm-project/commit/56656f8378c842ef83af2ec7606e185d6fdefaf1.diff

LOG: [X86] Add SSE2 test coverage to vector comparison all_of/any_of tests

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/vector-compare-all_of.ll
    llvm/test/CodeGen/X86/vector-compare-any_of.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index bbc6757c9a699..581f1d71aa341 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2   | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx    | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2   | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512
 
 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
 ; SSE-LABEL: test_v2f64_sext:
@@ -272,15 +273,33 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
 }
 
 define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
-; SSE-LABEL: test_v2i64_sext:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpgtq %xmm1, %xmm0
-; SSE-NEXT:    movmskpd %xmm0, %ecx
-; SSE-NEXT:    xorl %eax, %eax
-; SSE-NEXT:    cmpl $3, %ecx
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    negq %rax
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_v2i64_sext:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    movmskpd %xmm1, %ecx
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    cmpl $3, %ecx
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    negq %rax
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: test_v2i64_sext:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT:    movmskpd %xmm0, %ecx
+; SSE42-NEXT:    xorl %eax, %eax
+; SSE42-NEXT:    cmpl $3, %ecx
+; SSE42-NEXT:    sete %al
+; SSE42-NEXT:    negq %rax
+; SSE42-NEXT:    retq
 ;
 ; AVX-LABEL: test_v2i64_sext:
 ; AVX:       # %bb.0:
@@ -310,17 +329,44 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
 }
 
 define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
-; SSE-LABEL: test_v4i64_sext:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpgtq %xmm3, %xmm1
-; SSE-NEXT:    pcmpgtq %xmm2, %xmm0
-; SSE-NEXT:    pand %xmm1, %xmm0
-; SSE-NEXT:    movmskpd %xmm0, %ecx
-; SSE-NEXT:    xorl %eax, %eax
-; SSE-NEXT:    cmpl $3, %ecx
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    negq %rax
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_v4i64_sext:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    por %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm3, %xmm2
+; SSE2-NEXT:    movmskpd %xmm2, %ecx
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    cmpl $3, %ecx
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    negq %rax
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: test_v4i64_sext:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    pcmpgtq %xmm3, %xmm1
+; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT:    pand %xmm1, %xmm0
+; SSE42-NEXT:    movmskpd %xmm0, %ecx
+; SSE42-NEXT:    xorl %eax, %eax
+; SSE42-NEXT:    cmpl $3, %ecx
+; SSE42-NEXT:    sete %al
+; SSE42-NEXT:    negq %rax
+; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v4i64_sext:
 ; AVX1:       # %bb.0:
@@ -369,17 +415,44 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
 }
 
 define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
-; SSE-LABEL: test_v4i64_legal_sext:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpgtq %xmm3, %xmm1
-; SSE-NEXT:    pcmpgtq %xmm2, %xmm0
-; SSE-NEXT:    packssdw %xmm1, %xmm0
-; SSE-NEXT:    movmskps %xmm0, %ecx
-; SSE-NEXT:    xorl %eax, %eax
-; SSE-NEXT:    cmpl $15, %ecx
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    negq %rax
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_v4i64_legal_sext:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    por %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm0, %xmm2
+; SSE2-NEXT:    packssdw %xmm3, %xmm2
+; SSE2-NEXT:    movmskps %xmm2, %ecx
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    cmpl $15, %ecx
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    negq %rax
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: test_v4i64_legal_sext:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    pcmpgtq %xmm3, %xmm1
+; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT:    packssdw %xmm1, %xmm0
+; SSE42-NEXT:    movmskps %xmm0, %ecx
+; SSE42-NEXT:    xorl %eax, %eax
+; SSE42-NEXT:    cmpl $15, %ecx
+; SSE42-NEXT:    sete %al
+; SSE42-NEXT:    negq %rax
+; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v4i64_legal_sext:
 ; AVX1:       # %bb.0:
@@ -1080,16 +1153,32 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
 }
 
 define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
-; SSE-LABEL: bool_reduction_v2i64:
-; SSE:       # %bb.0:
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; SSE-NEXT:    pxor %xmm2, %xmm1
-; SSE-NEXT:    pxor %xmm2, %xmm0
-; SSE-NEXT:    pcmpgtq %xmm1, %xmm0
-; SSE-NEXT:    movmskpd %xmm0, %eax
-; SSE-NEXT:    cmpb $3, %al
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: bool_reduction_v2i64:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    movmskpd %xmm1, %eax
+; SSE2-NEXT:    cmpb $3, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: bool_reduction_v2i64:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    pxor %xmm2, %xmm1
+; SSE42-NEXT:    pxor %xmm2, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT:    movmskpd %xmm0, %eax
+; SSE42-NEXT:    cmpb $3, %al
+; SSE42-NEXT:    sete %al
+; SSE42-NEXT:    retq
 ;
 ; AVX-LABEL: bool_reduction_v2i64:
 ; AVX:       # %bb.0:
@@ -1225,15 +1314,40 @@ define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
 }
 
 define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
-; SSE-LABEL: bool_reduction_v4i64:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpgtq %xmm1, %xmm3
-; SSE-NEXT:    pcmpgtq %xmm0, %xmm2
-; SSE-NEXT:    packssdw %xmm3, %xmm2
-; SSE-NEXT:    movmskps %xmm2, %eax
-; SSE-NEXT:    cmpb $15, %al
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: bool_reduction_v4i64:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm4, %xmm1
+; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,0,2,2]
+; SSE2-NEXT:    pand %xmm5, %xmm1
+; SSE2-NEXT:    por %xmm3, %xmm1
+; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    packssdw %xmm1, %xmm0
+; SSE2-NEXT:    movmskps %xmm0, %eax
+; SSE2-NEXT:    cmpb $15, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: bool_reduction_v4i64:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm3
+; SSE42-NEXT:    pcmpgtq %xmm0, %xmm2
+; SSE42-NEXT:    packssdw %xmm3, %xmm2
+; SSE42-NEXT:    movmskps %xmm2, %eax
+; SSE42-NEXT:    cmpb $15, %al
+; SSE42-NEXT:    sete %al
+; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: bool_reduction_v4i64:
 ; AVX1:       # %bb.0:
@@ -1275,18 +1389,37 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
 }
 
 define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
-; SSE-LABEL: bool_reduction_v8i32:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pminud %xmm1, %xmm3
-; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
-; SSE-NEXT:    pminud %xmm0, %xmm2
-; SSE-NEXT:    pcmpeqd %xmm0, %xmm2
-; SSE-NEXT:    packssdw %xmm3, %xmm2
-; SSE-NEXT:    packsswb %xmm2, %xmm2
-; SSE-NEXT:    pmovmskb %xmm2, %eax
-; SSE-NEXT:    cmpb $-1, %al
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: bool_reduction_v8i32:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm1
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm3, %xmm1
+; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT:    pxor %xmm3, %xmm0
+; SSE2-NEXT:    packssdw %xmm1, %xmm0
+; SSE2-NEXT:    packsswb %xmm0, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpb $-1, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: bool_reduction_v8i32:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    pminud %xmm1, %xmm3
+; SSE42-NEXT:    pcmpeqd %xmm1, %xmm3
+; SSE42-NEXT:    pminud %xmm0, %xmm2
+; SSE42-NEXT:    pcmpeqd %xmm0, %xmm2
+; SSE42-NEXT:    packssdw %xmm3, %xmm2
+; SSE42-NEXT:    packsswb %xmm2, %xmm2
+; SSE42-NEXT:    pmovmskb %xmm2, %eax
+; SSE42-NEXT:    cmpb $-1, %al
+; SSE42-NEXT:    sete %al
+; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: bool_reduction_v8i32:
 ; AVX1:       # %bb.0:
@@ -1331,14 +1464,24 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
 }
 
 define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
-; SSE-LABEL: bool_reduction_v16i16:
-; SSE:       # %bb.0:
-; SSE-NEXT:    psubb %xmm3, %xmm1
-; SSE-NEXT:    psubb %xmm2, %xmm0
-; SSE-NEXT:    por %xmm1, %xmm0
-; SSE-NEXT:    ptest %xmm0, %xmm0
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: bool_reduction_v16i16:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: bool_reduction_v16i16:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    psubb %xmm3, %xmm1
+; SSE42-NEXT:    psubb %xmm2, %xmm0
+; SSE42-NEXT:    por %xmm1, %xmm0
+; SSE42-NEXT:    ptest %xmm0, %xmm0
+; SSE42-NEXT:    sete %al
+; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: bool_reduction_v16i16:
 ; AVX1:       # %bb.0:
@@ -1381,14 +1524,24 @@ define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
 }
 
 define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
-; SSE-LABEL: bool_reduction_v32i8:
-; SSE:       # %bb.0:
-; SSE-NEXT:    psubb %xmm3, %xmm1
-; SSE-NEXT:    psubb %xmm2, %xmm0
-; SSE-NEXT:    por %xmm1, %xmm0
-; SSE-NEXT:    ptest %xmm0, %xmm0
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: bool_reduction_v32i8:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: bool_reduction_v32i8:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    psubb %xmm3, %xmm1
+; SSE42-NEXT:    psubb %xmm2, %xmm0
+; SSE42-NEXT:    por %xmm1, %xmm0
+; SSE42-NEXT:    ptest %xmm0, %xmm0
+; SSE42-NEXT:    sete %al
+; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: bool_reduction_v32i8:
 ; AVX1:       # %bb.0:

diff  --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
index aee0fa869be86..9e68f4c62298a 100644
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2   | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx    | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2   | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512
 
 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
 ; SSE-LABEL: test_v2f64_sext:
@@ -254,14 +255,31 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
 }
 
 define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
-; SSE-LABEL: test_v2i64_sext:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpgtq %xmm1, %xmm0
-; SSE-NEXT:    movmskpd %xmm0, %ecx
-; SSE-NEXT:    xorl %eax, %eax
-; SSE-NEXT:    negl %ecx
-; SSE-NEXT:    sbbq %rax, %rax
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_v2i64_sext:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    movmskpd %xmm1, %ecx
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    negl %ecx
+; SSE2-NEXT:    sbbq %rax, %rax
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: test_v2i64_sext:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT:    movmskpd %xmm0, %ecx
+; SSE42-NEXT:    xorl %eax, %eax
+; SSE42-NEXT:    negl %ecx
+; SSE42-NEXT:    sbbq %rax, %rax
+; SSE42-NEXT:    retq
 ;
 ; AVX-LABEL: test_v2i64_sext:
 ; AVX:       # %bb.0:
@@ -289,16 +307,42 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
 }
 
 define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
-; SSE-LABEL: test_v4i64_sext:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpgtq %xmm3, %xmm1
-; SSE-NEXT:    pcmpgtq %xmm2, %xmm0
-; SSE-NEXT:    por %xmm1, %xmm0
-; SSE-NEXT:    movmskpd %xmm0, %ecx
-; SSE-NEXT:    xorl %eax, %eax
-; SSE-NEXT:    negl %ecx
-; SSE-NEXT:    sbbq %rax, %rax
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_v4i64_sext:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    por %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movmskpd %xmm2, %ecx
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    negl %ecx
+; SSE2-NEXT:    sbbq %rax, %rax
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: test_v4i64_sext:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    pcmpgtq %xmm3, %xmm1
+; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT:    por %xmm1, %xmm0
+; SSE42-NEXT:    movmskpd %xmm0, %ecx
+; SSE42-NEXT:    xorl %eax, %eax
+; SSE42-NEXT:    negl %ecx
+; SSE42-NEXT:    sbbq %rax, %rax
+; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v4i64_sext:
 ; AVX1:       # %bb.0:
@@ -344,16 +388,42 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
 }
 
 define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
-; SSE-LABEL: test_v4i64_legal_sext:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpgtq %xmm3, %xmm1
-; SSE-NEXT:    pcmpgtq %xmm2, %xmm0
-; SSE-NEXT:    packssdw %xmm1, %xmm0
-; SSE-NEXT:    movmskps %xmm0, %ecx
-; SSE-NEXT:    xorl %eax, %eax
-; SSE-NEXT:    negl %ecx
-; SSE-NEXT:    sbbq %rax, %rax
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_v4i64_legal_sext:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    por %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm0, %xmm2
+; SSE2-NEXT:    packssdw %xmm3, %xmm2
+; SSE2-NEXT:    movmskps %xmm2, %ecx
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    negl %ecx
+; SSE2-NEXT:    sbbq %rax, %rax
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: test_v4i64_legal_sext:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    pcmpgtq %xmm3, %xmm1
+; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT:    packssdw %xmm1, %xmm0
+; SSE42-NEXT:    movmskps %xmm0, %ecx
+; SSE42-NEXT:    xorl %eax, %eax
+; SSE42-NEXT:    negl %ecx
+; SSE42-NEXT:    sbbq %rax, %rax
+; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v4i64_legal_sext:
 ; AVX1:       # %bb.0:
@@ -978,16 +1048,32 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
 }
 
 define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
-; SSE-LABEL: bool_reduction_v2i64:
-; SSE:       # %bb.0:
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; SSE-NEXT:    pxor %xmm2, %xmm1
-; SSE-NEXT:    pxor %xmm2, %xmm0
-; SSE-NEXT:    pcmpgtq %xmm1, %xmm0
-; SSE-NEXT:    movmskpd %xmm0, %eax
-; SSE-NEXT:    testl %eax, %eax
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: bool_reduction_v2i64:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    movmskpd %xmm1, %eax
+; SSE2-NEXT:    testl %eax, %eax
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: bool_reduction_v2i64:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    pxor %xmm2, %xmm1
+; SSE42-NEXT:    pxor %xmm2, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT:    movmskpd %xmm0, %eax
+; SSE42-NEXT:    testl %eax, %eax
+; SSE42-NEXT:    setne %al
+; SSE42-NEXT:    retq
 ;
 ; AVX-LABEL: bool_reduction_v2i64:
 ; AVX:       # %bb.0:
@@ -1015,12 +1101,20 @@ define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
 }
 
 define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
-; SSE-LABEL: bool_reduction_v4i32:
-; SSE:       # %bb.0:
-; SSE-NEXT:    psubd %xmm1, %xmm0
-; SSE-NEXT:    ptest %xmm0, %xmm0
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: bool_reduction_v4i32:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    movmskps %xmm0, %eax
+; SSE2-NEXT:    cmpl $15, %eax
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: bool_reduction_v4i32:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    psubd %xmm1, %xmm0
+; SSE42-NEXT:    ptest %xmm0, %xmm0
+; SSE42-NEXT:    setne %al
+; SSE42-NEXT:    retq
 ;
 ; AVX-LABEL: bool_reduction_v4i32:
 ; AVX:       # %bb.0:
@@ -1117,15 +1211,40 @@ define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
 }
 
 define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
-; SSE-LABEL: bool_reduction_v4i64:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpgtq %xmm1, %xmm3
-; SSE-NEXT:    pcmpgtq %xmm0, %xmm2
-; SSE-NEXT:    packssdw %xmm3, %xmm2
-; SSE-NEXT:    movmskps %xmm2, %eax
-; SSE-NEXT:    testl %eax, %eax
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: bool_reduction_v4i64:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm4, %xmm1
+; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,0,2,2]
+; SSE2-NEXT:    pand %xmm5, %xmm1
+; SSE2-NEXT:    por %xmm3, %xmm1
+; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    packssdw %xmm1, %xmm0
+; SSE2-NEXT:    movmskps %xmm0, %eax
+; SSE2-NEXT:    testl %eax, %eax
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: bool_reduction_v4i64:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm3
+; SSE42-NEXT:    pcmpgtq %xmm0, %xmm2
+; SSE42-NEXT:    packssdw %xmm3, %xmm2
+; SSE42-NEXT:    movmskps %xmm2, %eax
+; SSE42-NEXT:    testl %eax, %eax
+; SSE42-NEXT:    setne %al
+; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: bool_reduction_v4i64:
 ; AVX1:       # %bb.0:
@@ -1167,17 +1286,35 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
 }
 
 define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
-; SSE-LABEL: bool_reduction_v8i32:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pminud %xmm1, %xmm3
-; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
-; SSE-NEXT:    pminud %xmm0, %xmm2
-; SSE-NEXT:    pcmpeqd %xmm0, %xmm2
-; SSE-NEXT:    packssdw %xmm3, %xmm2
-; SSE-NEXT:    pmovmskb %xmm2, %eax
-; SSE-NEXT:    testl %eax, %eax
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: bool_reduction_v8i32:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm1
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm3, %xmm1
+; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT:    pxor %xmm3, %xmm0
+; SSE2-NEXT:    packssdw %xmm1, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    testl %eax, %eax
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: bool_reduction_v8i32:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    pminud %xmm1, %xmm3
+; SSE42-NEXT:    pcmpeqd %xmm1, %xmm3
+; SSE42-NEXT:    pminud %xmm0, %xmm2
+; SSE42-NEXT:    pcmpeqd %xmm0, %xmm2
+; SSE42-NEXT:    packssdw %xmm3, %xmm2
+; SSE42-NEXT:    pmovmskb %xmm2, %eax
+; SSE42-NEXT:    testl %eax, %eax
+; SSE42-NEXT:    setne %al
+; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: bool_reduction_v8i32:
 ; AVX1:       # %bb.0:


        


More information about the llvm-commits mailing list