[llvm] 835a737 - [X86] Add test showing incorrect movmsk->ptest fold
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 28 05:22:46 PST 2022
Author: Simon Pilgrim
Date: 2022-01-28T13:22:36Z
New Revision: 835a737887cf565b3dba7bb73d405994009a478d
URL: https://github.com/llvm/llvm-project/commit/835a737887cf565b3dba7bb73d405994009a478d
DIFF: https://github.com/llvm/llvm-project/commit/835a737887cf565b3dba7bb73d405994009a478d.diff
LOG: [X86] Add test showing incorrect movmsk->ptest fold
We can't fold MOVMSK(BITCAST(PCMPEQ(X,0))) -> PTESTZ(X,X) if we're not testing every element comparison
Added:
Modified:
llvm/test/CodeGen/X86/combine-movmsk.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/combine-movmsk.ll b/llvm/test/CodeGen/X86/combine-movmsk.ll
index 892475d07ade..1ef0ea1199f5 100644
--- a/llvm/test/CodeGen/X86/combine-movmsk.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk.ll
@@ -306,3 +306,40 @@ define i32 @or_pmovmskb_pmovmskb(<16 x i8> %a0, <8 x i16> %a1) {
%7 = or i32 %3, %6
ret i32 %7
}
+
+; TODO: We can't fold to ptest if we're not checking every pcmpeq result
+define i32 @movmskps_ptest_numelts_mismatch(<16 x i8> %a0) {
+; SSE2-LABEL: movmskps_ptest_numelts_mismatch:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT: movmskps %xmm1, %ecx
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: cmpl $15, %ecx
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: negl %eax
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: movmskps_ptest_numelts_mismatch:
+; SSE42: # %bb.0:
+; SSE42-NEXT: xorl %eax, %eax
+; SSE42-NEXT: ptest %xmm0, %xmm0
+; SSE42-NEXT: sete %al
+; SSE42-NEXT: negl %eax
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: movmskps_ptest_numelts_mismatch:
+; AVX: # %bb.0:
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: sete %al
+; AVX-NEXT: negl %eax
+; AVX-NEXT: retq
+ %1 = icmp eq <16 x i8> %a0, zeroinitializer
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ %3 = bitcast <16 x i8> %2 to <4 x float>
+ %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
+ %5 = icmp eq i32 %4, 15
+ %6 = sext i1 %5 to i32
+ ret i32 %6
+}
More information about the llvm-commits
mailing list