[llvm] 7ea3440 - [X86] Only fold PTEST->TESTP on AVX targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 13 11:40:05 PDT 2023
Author: Simon Pilgrim
Date: 2023-04-13T19:39:47+01:00
New Revision: 7ea34403f3945506b25847f893ac28c70be172a0
URL: https://github.com/llvm/llvm-project/commit/7ea34403f3945506b25847f893ac28c70be172a0
DIFF: https://github.com/llvm/llvm-project/commit/7ea34403f3945506b25847f893ac28c70be172a0.diff
LOG: [X86] Only fold PTEST->TESTP on AVX targets
While PTEST is a SSE41 instruction, TESTPS/TESTPD was only added for AVX
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-ptest.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 15496d8a2fe3..9036c8b43b37 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47398,7 +47398,7 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
// For vXi16 cases we need to use pmovmksb and extract every other
// sign bit.
SDLoc DL(EFLAGS);
- if (EltBits == 32 || EltBits == 64) {
+ if ((EltBits == 32 || EltBits == 64) && Subtarget.hasAVX()) {
MVT FloatSVT = MVT::getFloatingPointVT(EltBits);
MVT FloatVT =
MVT::getVectorVT(FloatSVT, OpVT.getSizeInBits() / EltBits);
diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll
index 0677bc268979..7d11745e0503 100644
--- a/llvm/test/CodeGen/X86/combine-ptest.ll
+++ b/llvm/test/CodeGen/X86/combine-ptest.ll
@@ -1,19 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; TODO: ADD SSE41 test coverage
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
;
; testz(~X,Y) -> testc(X,Y)
;
define i32 @ptestz_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_invert0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vptest %xmm1, %xmm0
-; CHECK-NEXT: cmovael %esi, %eax
-; CHECK-NEXT: retq
+; SSE-LABEL: ptestz_128_invert0:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: ptest %xmm1, %xmm0
+; SSE-NEXT: cmovael %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestz_128_invert0:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vptest %xmm1, %xmm0
+; AVX-NEXT: cmovael %esi, %eax
+; AVX-NEXT: retq
%t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
%t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %d)
%t3 = icmp ne i32 %t2, 0
@@ -26,12 +34,19 @@ define i32 @ptestz_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
;
define i32 @ptestz_128_invert1(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_invert1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vptest %xmm0, %xmm1
-; CHECK-NEXT: cmovael %esi, %eax
-; CHECK-NEXT: retq
+; SSE-LABEL: ptestz_128_invert1:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: ptest %xmm0, %xmm1
+; SSE-NEXT: cmovael %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestz_128_invert1:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vptest %xmm0, %xmm1
+; AVX-NEXT: cmovael %esi, %eax
+; AVX-NEXT: retq
%t1 = xor <2 x i64> %d, <i64 -1, i64 -1>
%t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %t1)
%t3 = icmp ne i32 %t2, 0
@@ -44,12 +59,19 @@ define i32 @ptestz_128_invert1(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
;
define i32 @ptestc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestc_128_invert0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vptest %xmm1, %xmm0
-; CHECK-NEXT: cmovnel %esi, %eax
-; CHECK-NEXT: retq
+; SSE-LABEL: ptestc_128_invert0:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: ptest %xmm1, %xmm0
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestc_128_invert0:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vptest %xmm1, %xmm0
+; AVX-NEXT: cmovnel %esi, %eax
+; AVX-NEXT: retq
%t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
%t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d)
%t3 = icmp ne i32 %t2, 0
@@ -62,12 +84,19 @@ define i32 @ptestc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
;
define i32 @ptestnzc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestnzc_128_invert0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vptest %xmm1, %xmm0
-; CHECK-NEXT: cmovnel %esi, %eax
-; CHECK-NEXT: retq
+; SSE-LABEL: ptestnzc_128_invert0:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: ptest %xmm1, %xmm0
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestnzc_128_invert0:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vptest %xmm1, %xmm0
+; AVX-NEXT: cmovnel %esi, %eax
+; AVX-NEXT: retq
%t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
%t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d)
%t3 = icmp ne i32 %t2, 0
@@ -80,13 +109,21 @@ define i32 @ptestnzc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
;
define i32 @ptestc_128_not(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestc_128_not:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vptest %xmm1, %xmm0
-; CHECK-NEXT: cmovael %esi, %eax
-; CHECK-NEXT: retq
+; SSE-LABEL: ptestc_128_not:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: ptest %xmm1, %xmm0
+; SSE-NEXT: cmovael %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestc_128_not:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vptest %xmm1, %xmm0
+; AVX-NEXT: cmovael %esi, %eax
+; AVX-NEXT: retq
%t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
%t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %c, <2 x i64> %t1)
%t3 = icmp ne i32 %t2, 0
@@ -99,12 +136,19 @@ define i32 @ptestc_128_not(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
;
define i32 @ptestz_128_and(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_and:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vptest %xmm1, %xmm0
-; CHECK-NEXT: cmovnel %esi, %eax
-; CHECK-NEXT: retq
+; SSE-LABEL: ptestz_128_and:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: ptest %xmm1, %xmm0
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestz_128_and:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vptest %xmm1, %xmm0
+; AVX-NEXT: cmovnel %esi, %eax
+; AVX-NEXT: retq
%t1 = and <2 x i64> %c, %d
%t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t1)
%t3 = icmp ne i32 %t2, 0
@@ -117,12 +161,19 @@ define i32 @ptestz_128_and(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
;
define i32 @ptestz_128_andc(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_andc:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vptest %xmm1, %xmm0
-; CHECK-NEXT: cmovael %esi, %eax
-; CHECK-NEXT: retq
+; SSE-LABEL: ptestz_128_andc:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: ptest %xmm1, %xmm0
+; SSE-NEXT: cmovael %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestz_128_andc:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vptest %xmm1, %xmm0
+; AVX-NEXT: cmovael %esi, %eax
+; AVX-NEXT: retq
%t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
%t2 = and <2 x i64> %t1, %d
%t3 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t2, <2 x i64> %t2)
@@ -136,12 +187,19 @@ define i32 @ptestz_128_andc(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
;
define i32 @ptestz_128_allones0(<2 x i64> %c, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_allones0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vptest %xmm0, %xmm0
-; CHECK-NEXT: cmovnel %esi, %eax
-; CHECK-NEXT: retq
+; SSE-LABEL: ptestz_128_allones0:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: ptest %xmm0, %xmm0
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestz_128_allones0:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: cmovnel %esi, %eax
+; AVX-NEXT: retq
%t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> <i64 -1, i64 -1>, <2 x i64> %c)
%t2 = icmp ne i32 %t1, 0
%t3 = select i1 %t2, i32 %a, i32 %b
@@ -153,12 +211,19 @@ define i32 @ptestz_128_allones0(<2 x i64> %c, i32 %a, i32 %b) {
;
define i32 @ptestz_128_allones1(<2 x i64> %c, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_128_allones1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vptest %xmm0, %xmm0
-; CHECK-NEXT: cmovnel %esi, %eax
-; CHECK-NEXT: retq
+; SSE-LABEL: ptestz_128_allones1:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: ptest %xmm0, %xmm0
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestz_128_allones1:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: cmovnel %esi, %eax
+; AVX-NEXT: retq
%t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> <i64 -1, i64 -1>)
%t2 = icmp ne i32 %t1, 0
%t3 = select i1 %t2, i32 %a, i32 %b
@@ -166,13 +231,21 @@ define i32 @ptestz_128_allones1(<2 x i64> %c, i32 %a, i32 %b) {
}
define zeroext i1 @PR38522(ptr %x, ptr %y) {
-; CHECK-LABEL: PR38522:
-; CHECK: # %bb.0: # %start
-; CHECK-NEXT: vmovdqa (%rdi), %xmm0
-; CHECK-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
-; CHECK-NEXT: vptest %xmm0, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: retq
+; SSE-LABEL: PR38522:
+; SSE: # %bb.0: # %start
+; SSE-NEXT: movdqa (%rdi), %xmm0
+; SSE-NEXT: pcmpgtb (%rsi), %xmm0
+; SSE-NEXT: ptest %xmm0, %xmm0
+; SSE-NEXT: sete %al
+; SSE-NEXT: retq
+;
+; AVX-LABEL: PR38522:
+; AVX: # %bb.0: # %start
+; AVX-NEXT: vmovdqa (%rdi), %xmm0
+; AVX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: sete %al
+; AVX-NEXT: retq
start:
%0 = load <16 x i8>, ptr %x, align 16
%1 = load <16 x i8>, ptr %y, align 16
@@ -185,16 +258,33 @@ start:
}
;
-; testz(ashr(X,bw-1),-1) -> testpd/testps/pmovmskb(X)
+; testz(ashr(X,bw-1),-1) -> testpd/testps/movmskpd/movmskps/pmovmskb(X)
;
define i32 @ptestz_v2i64_signbits(<2 x i64> %c, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_v2i64_signbits:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vtestpd %xmm0, %xmm0
-; CHECK-NEXT: cmovnel %esi, %eax
-; CHECK-NEXT: retq
+; SSE41-LABEL: ptestz_v2i64_signbits:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movl %edi, %eax
+; SSE41-NEXT: psrad $31, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: ptest %xmm0, %xmm0
+; SSE41-NEXT: cmovnel %esi, %eax
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: ptestz_v2i64_signbits:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movl %edi, %eax
+; SSE42-NEXT: movmskpd %xmm0, %ecx
+; SSE42-NEXT: testl %ecx, %ecx
+; SSE42-NEXT: cmovnel %esi, %eax
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: ptestz_v2i64_signbits:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vtestpd %xmm0, %xmm0
+; AVX-NEXT: cmovnel %esi, %eax
+; AVX-NEXT: retq
%t1 = ashr <2 x i64> %c, <i64 63, i64 63>
%t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> <i64 -1, i64 -1>)
%t3 = icmp ne i32 %t2, 0
@@ -202,14 +292,45 @@ define i32 @ptestz_v2i64_signbits(<2 x i64> %c, i32 %a, i32 %b) {
ret i32 %t4
}
+define i32 @ptestz_v4i32_signbits(<4 x i32> %c, i32 %a, i32 %b) {
+; SSE-LABEL: ptestz_v4i32_signbits:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: testl %ecx, %ecx
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestz_v4i32_signbits:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vtestps %xmm0, %xmm0
+; AVX-NEXT: cmovnel %esi, %eax
+; AVX-NEXT: retq
+ %t1 = ashr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
+ %t2 = bitcast <4 x i32> %t1 to <2 x i64>
+ %t3 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t2, <2 x i64> <i64 -1, i64 -1>)
+ %t4 = icmp ne i32 %t3, 0
+ %t5 = select i1 %t4, i32 %a, i32 %b
+ ret i32 %t5
+}
+
define i32 @ptestz_v8i16_signbits(<8 x i16> %c, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_v8i16_signbits:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpmovmskb %xmm0, %ecx
-; CHECK-NEXT: testl $43690, %ecx # imm = 0xAAAA
-; CHECK-NEXT: cmovnel %esi, %eax
-; CHECK-NEXT: retq
+; SSE-LABEL: ptestz_v8i16_signbits:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: testl $43690, %ecx # imm = 0xAAAA
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestz_v8i16_signbits:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: testl $43690, %ecx # imm = 0xAAAA
+; AVX-NEXT: cmovnel %esi, %eax
+; AVX-NEXT: retq
%t1 = ashr <8 x i16> %c, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%t2 = bitcast <8 x i16> %t1 to <2 x i64>
%t3 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t2, <2 x i64> <i64 -1, i64 -1>)
@@ -223,13 +344,22 @@ define i32 @ptestz_v8i16_signbits(<8 x i16> %c, i32 %a, i32 %b) {
;
define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: ptestz_v2i64_concat:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vptest %ymm1, %ymm0
-; CHECK-NEXT: cmovnel %esi, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; SSE-LABEL: ptestz_v2i64_concat:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: por %xmm3, %xmm2
+; SSE-NEXT: ptest %xmm2, %xmm0
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptestz_v2i64_concat:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vptest %ymm1, %ymm0
+; AVX-NEXT: cmovnel %esi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%t1 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
%t2 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
%t3 = shufflevector <4 x i64> %d, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
@@ -244,13 +374,21 @@ define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
; FIXME: Foldable to ptest(xor(%0,%1),xor(%0,%1))
define i1 @PR38788(<4 x i32> %0, <4 x i32> %1) {
-; CHECK-LABEL: PR38788:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vptest %xmm1, %xmm0
-; CHECK-NEXT: setb %al
-; CHECK-NEXT: retq
+; SSE-LABEL: PR38788:
+; SSE: # %bb.0:
+; SSE-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: ptest %xmm1, %xmm0
+; SSE-NEXT: setb %al
+; SSE-NEXT: retq
+;
+; AVX-LABEL: PR38788:
+; AVX: # %bb.0:
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vptest %xmm1, %xmm0
+; AVX-NEXT: setb %al
+; AVX-NEXT: retq
%3 = icmp eq <4 x i32> %0, %1
%4 = sext <4 x i1> %3 to <4 x i32>
%5 = bitcast <4 x i32> %4 to <2 x i64>
@@ -262,5 +400,3 @@ define i1 @PR38788(<4 x i32> %0, <4 x i32> %1) {
declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX: {{.*}}
More information about the llvm-commits
mailing list