[llvm] 7ea3440 - [X86] Only fold PTEST->TESTP on AVX targets

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 13 11:40:05 PDT 2023


Author: Simon Pilgrim
Date: 2023-04-13T19:39:47+01:00
New Revision: 7ea34403f3945506b25847f893ac28c70be172a0

URL: https://github.com/llvm/llvm-project/commit/7ea34403f3945506b25847f893ac28c70be172a0
DIFF: https://github.com/llvm/llvm-project/commit/7ea34403f3945506b25847f893ac28c70be172a0.diff

LOG: [X86] Only fold PTEST->TESTP on AVX targets

While PTEST is a SSE41 instruction, TESTPS/TESTPD was only added for AVX

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/combine-ptest.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 15496d8a2fe3..9036c8b43b37 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47398,7 +47398,7 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
             // For vXi16 cases we need to use pmovmksb and extract every other
             // sign bit.
             SDLoc DL(EFLAGS);
-            if (EltBits == 32 || EltBits == 64) {
+            if ((EltBits == 32 || EltBits == 64) && Subtarget.hasAVX()) {
               MVT FloatSVT = MVT::getFloatingPointVT(EltBits);
               MVT FloatVT =
                   MVT::getVectorVT(FloatSVT, OpVT.getSizeInBits() / EltBits);

diff  --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll
index 0677bc268979..7d11745e0503 100644
--- a/llvm/test/CodeGen/X86/combine-ptest.ll
+++ b/llvm/test/CodeGen/X86/combine-ptest.ll
@@ -1,19 +1,27 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; TODO: ADD SSE41 test coverage
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx    | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2   | FileCheck %s --check-prefixes=AVX
 
 ;
 ; testz(~X,Y) -> testc(X,Y)
 ;
 
 define i32 @ptestz_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_invert0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vptest %xmm1, %xmm0
-; CHECK-NEXT:    cmovael %esi, %eax
-; CHECK-NEXT:    retq
+; SSE-LABEL: ptestz_128_invert0:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    ptest %xmm1, %xmm0
+; SSE-NEXT:    cmovael %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestz_128_invert0:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vptest %xmm1, %xmm0
+; AVX-NEXT:    cmovael %esi, %eax
+; AVX-NEXT:    retq
   %t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
   %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %d)
   %t3 = icmp ne i32 %t2, 0
@@ -26,12 +34,19 @@ define i32 @ptestz_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
 ;
 
 define i32 @ptestz_128_invert1(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_invert1:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vptest %xmm0, %xmm1
-; CHECK-NEXT:    cmovael %esi, %eax
-; CHECK-NEXT:    retq
+; SSE-LABEL: ptestz_128_invert1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    ptest %xmm0, %xmm1
+; SSE-NEXT:    cmovael %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestz_128_invert1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vptest %xmm0, %xmm1
+; AVX-NEXT:    cmovael %esi, %eax
+; AVX-NEXT:    retq
   %t1 = xor <2 x i64> %d, <i64 -1, i64 -1>
   %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %t1)
   %t3 = icmp ne i32 %t2, 0
@@ -44,12 +59,19 @@ define i32 @ptestz_128_invert1(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
 ;
 
 define i32 @ptestc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestc_128_invert0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vptest %xmm1, %xmm0
-; CHECK-NEXT:    cmovnel %esi, %eax
-; CHECK-NEXT:    retq
+; SSE-LABEL: ptestc_128_invert0:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    ptest %xmm1, %xmm0
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestc_128_invert0:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vptest %xmm1, %xmm0
+; AVX-NEXT:    cmovnel %esi, %eax
+; AVX-NEXT:    retq
   %t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
   %t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d)
   %t3 = icmp ne i32 %t2, 0
@@ -62,12 +84,19 @@ define i32 @ptestc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
 ;
 
 define i32 @ptestnzc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestnzc_128_invert0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vptest %xmm1, %xmm0
-; CHECK-NEXT:    cmovnel %esi, %eax
-; CHECK-NEXT:    retq
+; SSE-LABEL: ptestnzc_128_invert0:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    ptest %xmm1, %xmm0
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestnzc_128_invert0:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vptest %xmm1, %xmm0
+; AVX-NEXT:    cmovnel %esi, %eax
+; AVX-NEXT:    retq
   %t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
   %t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d)
   %t3 = icmp ne i32 %t2, 0
@@ -80,13 +109,21 @@ define i32 @ptestnzc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
 ;
 
 define i32 @ptestc_128_not(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestc_128_not:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vptest %xmm1, %xmm0
-; CHECK-NEXT:    cmovael %esi, %eax
-; CHECK-NEXT:    retq
+; SSE-LABEL: ptestc_128_not:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE-NEXT:    ptest %xmm1, %xmm0
+; SSE-NEXT:    cmovael %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestc_128_not:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vptest %xmm1, %xmm0
+; AVX-NEXT:    cmovael %esi, %eax
+; AVX-NEXT:    retq
   %t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
   %t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %c, <2 x i64> %t1)
   %t3 = icmp ne i32 %t2, 0
@@ -99,12 +136,19 @@ define i32 @ptestc_128_not(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
 ;
 
 define i32 @ptestz_128_and(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_and:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vptest %xmm1, %xmm0
-; CHECK-NEXT:    cmovnel %esi, %eax
-; CHECK-NEXT:    retq
+; SSE-LABEL: ptestz_128_and:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    ptest %xmm1, %xmm0
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestz_128_and:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vptest %xmm1, %xmm0
+; AVX-NEXT:    cmovnel %esi, %eax
+; AVX-NEXT:    retq
   %t1 = and <2 x i64> %c, %d
   %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t1)
   %t3 = icmp ne i32 %t2, 0
@@ -117,12 +161,19 @@ define i32 @ptestz_128_and(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
 ;
 
 define i32 @ptestz_128_andc(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_andc:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vptest %xmm1, %xmm0
-; CHECK-NEXT:    cmovael %esi, %eax
-; CHECK-NEXT:    retq
+; SSE-LABEL: ptestz_128_andc:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    ptest %xmm1, %xmm0
+; SSE-NEXT:    cmovael %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestz_128_andc:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vptest %xmm1, %xmm0
+; AVX-NEXT:    cmovael %esi, %eax
+; AVX-NEXT:    retq
   %t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
   %t2 = and <2 x i64> %t1, %d
   %t3 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t2, <2 x i64> %t2)
@@ -136,12 +187,19 @@ define i32 @ptestz_128_andc(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
 ;
 
 define i32 @ptestz_128_allones0(<2 x i64> %c, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_allones0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vptest %xmm0, %xmm0
-; CHECK-NEXT:    cmovnel %esi, %eax
-; CHECK-NEXT:    retq
+; SSE-LABEL: ptestz_128_allones0:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    ptest %xmm0, %xmm0
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestz_128_allones0:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vptest %xmm0, %xmm0
+; AVX-NEXT:    cmovnel %esi, %eax
+; AVX-NEXT:    retq
   %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> <i64 -1, i64 -1>, <2 x i64> %c)
   %t2 = icmp ne i32 %t1, 0
   %t3 = select i1 %t2, i32 %a, i32 %b
@@ -153,12 +211,19 @@ define i32 @ptestz_128_allones0(<2 x i64> %c, i32 %a, i32 %b) {
 ;
 
 define i32 @ptestz_128_allones1(<2 x i64> %c, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_allones1:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vptest %xmm0, %xmm0
-; CHECK-NEXT:    cmovnel %esi, %eax
-; CHECK-NEXT:    retq
+; SSE-LABEL: ptestz_128_allones1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    ptest %xmm0, %xmm0
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestz_128_allones1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vptest %xmm0, %xmm0
+; AVX-NEXT:    cmovnel %esi, %eax
+; AVX-NEXT:    retq
   %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> <i64 -1, i64 -1>)
   %t2 = icmp ne i32 %t1, 0
   %t3 = select i1 %t2, i32 %a, i32 %b
@@ -166,13 +231,21 @@ define i32 @ptestz_128_allones1(<2 x i64> %c, i32 %a, i32 %b) {
 }
 
 define zeroext i1 @PR38522(ptr %x, ptr %y) {
-; CHECK-LABEL: PR38522:
-; CHECK:       # %bb.0: # %start
-; CHECK-NEXT:    vmovdqa (%rdi), %xmm0
-; CHECK-NEXT:    vpcmpgtb (%rsi), %xmm0, %xmm0
-; CHECK-NEXT:    vptest %xmm0, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    retq
+; SSE-LABEL: PR38522:
+; SSE:       # %bb.0: # %start
+; SSE-NEXT:    movdqa (%rdi), %xmm0
+; SSE-NEXT:    pcmpgtb (%rsi), %xmm0
+; SSE-NEXT:    ptest %xmm0, %xmm0
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: PR38522:
+; AVX:       # %bb.0: # %start
+; AVX-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX-NEXT:    vpcmpgtb (%rsi), %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
 start:
   %0 = load <16 x i8>, ptr %x, align 16
   %1 = load <16 x i8>, ptr %y, align 16
@@ -185,16 +258,33 @@ start:
 }
 
 ;
-; testz(ashr(X,bw-1),-1) -> testpd/testps/pmovmskb(X)
+; testz(ashr(X,bw-1),-1) -> testpd/testps/movmskpd/movmskps/pmovmskb(X)
 ;
 
 define i32 @ptestz_v2i64_signbits(<2 x i64> %c, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_v2i64_signbits:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vtestpd %xmm0, %xmm0
-; CHECK-NEXT:    cmovnel %esi, %eax
-; CHECK-NEXT:    retq
+; SSE41-LABEL: ptestz_v2i64_signbits:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movl %edi, %eax
+; SSE41-NEXT:    psrad $31, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    cmovnel %esi, %eax
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: ptestz_v2i64_signbits:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    movl %edi, %eax
+; SSE42-NEXT:    movmskpd %xmm0, %ecx
+; SSE42-NEXT:    testl %ecx, %ecx
+; SSE42-NEXT:    cmovnel %esi, %eax
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: ptestz_v2i64_signbits:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vtestpd %xmm0, %xmm0
+; AVX-NEXT:    cmovnel %esi, %eax
+; AVX-NEXT:    retq
   %t1 = ashr <2 x i64> %c, <i64 63, i64 63>
   %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> <i64 -1, i64 -1>)
   %t3 = icmp ne i32 %t2, 0
@@ -202,14 +292,45 @@ define i32 @ptestz_v2i64_signbits(<2 x i64> %c, i32 %a, i32 %b) {
   ret i32 %t4
 }
 
+define i32 @ptestz_v4i32_signbits(<4 x i32> %c, i32 %a, i32 %b) {
+; SSE-LABEL: ptestz_v4i32_signbits:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    movmskps %xmm0, %ecx
+; SSE-NEXT:    testl %ecx, %ecx
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestz_v4i32_signbits:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vtestps %xmm0, %xmm0
+; AVX-NEXT:    cmovnel %esi, %eax
+; AVX-NEXT:    retq
+  %t1 = ashr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
+  %t2 = bitcast <4 x i32> %t1 to <2 x i64>
+  %t3 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t2, <2 x i64> <i64 -1, i64 -1>)
+  %t4 = icmp ne i32 %t3, 0
+  %t5 = select i1 %t4, i32 %a, i32 %b
+  ret i32 %t5
+}
+
 define i32 @ptestz_v8i16_signbits(<8 x i16> %c, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_v8i16_signbits:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vpmovmskb %xmm0, %ecx
-; CHECK-NEXT:    testl $43690, %ecx # imm = 0xAAAA
-; CHECK-NEXT:    cmovnel %esi, %eax
-; CHECK-NEXT:    retq
+; SSE-LABEL: ptestz_v8i16_signbits:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    testl $43690, %ecx # imm = 0xAAAA
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestz_v8i16_signbits:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX-NEXT:    testl $43690, %ecx # imm = 0xAAAA
+; AVX-NEXT:    cmovnel %esi, %eax
+; AVX-NEXT:    retq
   %t1 = ashr <8 x i16> %c, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
   %t2 = bitcast <8 x i16> %t1 to <2 x i64>
   %t3 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t2, <2 x i64> <i64 -1, i64 -1>)
@@ -223,13 +344,22 @@ define i32 @ptestz_v8i16_signbits(<8 x i16> %c, i32 %a, i32 %b) {
 ;
 
 define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_v2i64_concat:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vptest %ymm1, %ymm0
-; CHECK-NEXT:    cmovnel %esi, %eax
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
+; SSE-LABEL: ptestz_v2i64_concat:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm2
+; SSE-NEXT:    ptest %xmm2, %xmm0
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ptestz_v2i64_concat:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vptest %ymm1, %ymm0
+; AVX-NEXT:    cmovnel %esi, %eax
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
   %t1 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
   %t2 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
   %t3 = shufflevector <4 x i64> %d, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
@@ -244,13 +374,21 @@ define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
 
 ; FIXME: Foldable to ptest(xor(%0,%1),xor(%0,%1))
 define i1 @PR38788(<4 x i32> %0, <4 x i32> %1) {
-; CHECK-LABEL: PR38788:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vptest %xmm1, %xmm0
-; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    retq
+; SSE-LABEL: PR38788:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE-NEXT:    ptest %xmm1, %xmm0
+; SSE-NEXT:    setb %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: PR38788:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vptest %xmm1, %xmm0
+; AVX-NEXT:    setb %al
+; AVX-NEXT:    retq
   %3 = icmp eq <4 x i32> %0, %1
   %4 = sext <4 x i1> %3 to <4 x i32>
   %5 = bitcast <4 x i32> %4 to <2 x i64>
@@ -262,5 +400,3 @@ define i1 @PR38788(<4 x i32> %0, <4 x i32> %1) {
 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX: {{.*}}


        


More information about the llvm-commits mailing list