[llvm] e6bc72d - [X86] Add MOVMSK bit extraction test coverage for #66191
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 07:28:23 PDT 2024
Author: Simon Pilgrim
Date: 2024-07-01T15:28:07+01:00
New Revision: e6bc72dfe885e8ef4eae1c7da87dd13e40cfed6d
URL: https://github.com/llvm/llvm-project/commit/e6bc72dfe885e8ef4eae1c7da87dd13e40cfed6d
DIFF: https://github.com/llvm/llvm-project/commit/e6bc72dfe885e8ef4eae1c7da87dd13e40cfed6d.diff
LOG: [X86] Add MOVMSK bit extraction test coverage for #66191
Regressions due to middle-end canonicalizing ICMP_EQ(AND(X,MSB),0) -> ICMP_SGT(X,-1) etc.
Added:
llvm/test/CodeGen/X86/movmsk-bittest.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/movmsk-bittest.ll b/llvm/test/CodeGen/X86/movmsk-bittest.ll
new file mode 100644
index 0000000000000..7c8fe03ff4741
--- /dev/null
+++ b/llvm/test/CodeGen/X86/movmsk-bittest.ll
@@ -0,0 +1,583 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
+
+
+; PR66191 - movmsk msb bit extraction
+
+define i32 @movmsk_eq_v2i64_0(<2 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v2i64_0:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: movmskpd %xmm0, %ecx
+; SSE-NEXT: testb $1, %cl
+; SSE-NEXT: cmovel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_eq_v2i64_0:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskpd %xmm0, %ecx
+; AVX-NEXT: testb $1, %cl
+; AVX-NEXT: cmovel %esi, %eax
+; AVX-NEXT: retq
+ %cmp = icmp slt <2 x i64> %v, zeroinitializer
+ %msk = bitcast <2 x i1> %cmp to i2
+ %bit = and i2 %msk, 1
+ %icmp = icmp eq i2 %bit, 0
+ %cond = select i1 %icmp, i32 %b, i32 %a
+ ret i32 %cond
+}
+
+define i32 @movmsk_slt_v2i64_1(<2 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v2i64_1:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: movmskpd %xmm0, %ecx
+; SSE-NEXT: shlb $6, %cl
+; SSE-NEXT: sarb $6, %cl
+; SSE-NEXT: cmovnsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_slt_v2i64_1:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskpd %xmm0, %ecx
+; AVX-NEXT: shlb $6, %cl
+; AVX-NEXT: sarb $6, %cl
+; AVX-NEXT: cmovnsl %esi, %eax
+; AVX-NEXT: retq
+ %cmp = icmp slt <2 x i64> %v, zeroinitializer
+ %msk = bitcast <2 x i1> %cmp to i2
+ %icmp = icmp slt i2 %msk, 0
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v2i64_1(<2 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v2i64_1:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: movmskpd %xmm0, %ecx
+; SSE-NEXT: shlb $6, %cl
+; SSE-NEXT: sarb $6, %cl
+; SSE-NEXT: cmovsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_sgt_v2i64_1:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskpd %xmm0, %ecx
+; AVX-NEXT: shlb $6, %cl
+; AVX-NEXT: sarb $6, %cl
+; AVX-NEXT: cmovsl %esi, %eax
+; AVX-NEXT: retq
+ %cmp = icmp slt <2 x i64> %v, zeroinitializer
+ %msk = bitcast <2 x i1> %cmp to i2
+ %icmp = icmp sgt i2 %msk, -1
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i32 @movmsk_eq_v4i32_0(<4 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v4i32_0:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: testb $1, %cl
+; SSE-NEXT: cmovel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_eq_v4i32_0:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskps %xmm0, %ecx
+; AVX-NEXT: testb $1, %cl
+; AVX-NEXT: cmovel %esi, %eax
+; AVX-NEXT: retq
+ %cmp = icmp slt <4 x i32> %v, zeroinitializer
+ %msk = bitcast <4 x i1> %cmp to i4
+ %bit = and i4 %msk, 1
+ %icmp = icmp eq i4 %bit, 0
+ %cond = select i1 %icmp, i32 %b, i32 %a
+ ret i32 %cond
+}
+
+define i32 @movmsk_slt_v4i32_3(<4 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v4i32_3:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: shlb $4, %cl
+; SSE-NEXT: sarb $4, %cl
+; SSE-NEXT: cmovnsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_slt_v4i32_3:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskps %xmm0, %ecx
+; AVX-NEXT: shlb $4, %cl
+; AVX-NEXT: sarb $4, %cl
+; AVX-NEXT: cmovnsl %esi, %eax
+; AVX-NEXT: retq
+ %cmp = icmp slt <4 x i32> %v, zeroinitializer
+ %msk = bitcast <4 x i1> %cmp to i4
+ %icmp = icmp slt i4 %msk, 0
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v4i32_3(<4 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v4i32_3:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: shlb $4, %cl
+; SSE-NEXT: sarb $4, %cl
+; SSE-NEXT: cmovsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_sgt_v4i32_3:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskps %xmm0, %ecx
+; AVX-NEXT: shlb $4, %cl
+; AVX-NEXT: sarb $4, %cl
+; AVX-NEXT: cmovsl %esi, %eax
+; AVX-NEXT: retq
+ %cmp = icmp slt <4 x i32> %v, zeroinitializer
+ %msk = bitcast <4 x i1> %cmp to i4
+ %icmp = icmp sgt i4 %msk, -1
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i32 @movmsk_eq_v16i8_0(<16 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v16i8_0:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: testb $1, %cl
+; SSE-NEXT: cmovel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_eq_v16i8_0:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: testb $1, %cl
+; AVX-NEXT: cmovel %esi, %eax
+; AVX-NEXT: retq
+ %cmp = icmp slt <16 x i8> %v, zeroinitializer
+ %msk = bitcast <16 x i1> %cmp to i16
+ %bit = and i16 %msk, 1
+ %icmp = icmp eq i16 %bit, 0
+ %cond = select i1 %icmp, i32 %b, i32 %a
+ ret i32 %cond
+}
+
+define i32 @movmsk_slt_v16i8_15(<16 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v16i8_15:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: testw %cx, %cx
+; SSE-NEXT: cmovnsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_slt_v16i8_15:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: testw %cx, %cx
+; AVX-NEXT: cmovnsl %esi, %eax
+; AVX-NEXT: retq
+ %cmp = icmp slt <16 x i8> %v, zeroinitializer
+ %msk = bitcast <16 x i1> %cmp to i16
+ %icmp = icmp slt i16 %msk, 0
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v16i8_15(<16 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v16i8_15:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: testw %cx, %cx
+; SSE-NEXT: cmovsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_sgt_v16i8_15:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: testw %cx, %cx
+; AVX-NEXT: cmovsl %esi, %eax
+; AVX-NEXT: retq
+ %cmp = icmp slt <16 x i8> %v, zeroinitializer
+ %msk = bitcast <16 x i1> %cmp to i16
+ %icmp = icmp sgt i16 %msk, -1
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i32 @movmsk_eq_v4i64_0(<4 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v4i64_0:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: testb $1, %cl
+; SSE-NEXT: cmovel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_eq_v4i64_0:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskpd %xmm0, %ecx
+; AVX-NEXT: testb $1, %cl
+; AVX-NEXT: cmovel %esi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cmp = icmp slt <4 x i64> %v, zeroinitializer
+ %msk = bitcast <4 x i1> %cmp to i4
+ %bit = and i4 %msk, 1
+ %icmp = icmp eq i4 %bit, 0
+ %cond = select i1 %icmp, i32 %b, i32 %a
+ ret i32 %cond
+}
+
+define i32 @movmsk_slt_v4i64_3(<4 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v4i64_3:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: shlb $4, %cl
+; SSE-NEXT: sarb $4, %cl
+; SSE-NEXT: cmovnsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_slt_v4i64_3:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskpd %ymm0, %ecx
+; AVX-NEXT: shlb $4, %cl
+; AVX-NEXT: sarb $4, %cl
+; AVX-NEXT: cmovnsl %esi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cmp = icmp slt <4 x i64> %v, zeroinitializer
+ %msk = bitcast <4 x i1> %cmp to i4
+ %icmp = icmp slt i4 %msk, 0
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v4i64_3(<4 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v4i64_3:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: shlb $4, %cl
+; SSE-NEXT: sarb $4, %cl
+; SSE-NEXT: cmovsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_sgt_v4i64_3:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskpd %ymm0, %ecx
+; AVX-NEXT: shlb $4, %cl
+; AVX-NEXT: sarb $4, %cl
+; AVX-NEXT: cmovsl %esi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cmp = icmp slt <4 x i64> %v, zeroinitializer
+ %msk = bitcast <4 x i1> %cmp to i4
+ %icmp = icmp sgt i4 %msk, -1
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i32 @movmsk_eq_v8i32_0(<8 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v8i32_0:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: packsswb %xmm0, %xmm0
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: testb $1, %cl
+; SSE-NEXT: cmovel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_eq_v8i32_0:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskps %xmm0, %ecx
+; AVX-NEXT: testb $1, %cl
+; AVX-NEXT: cmovel %esi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cmp = icmp slt <8 x i32> %v, zeroinitializer
+ %msk = bitcast <8 x i1> %cmp to i8
+ %bit = and i8 %msk, 1
+ %icmp = icmp eq i8 %bit, 0
+ %cond = select i1 %icmp, i32 %b, i32 %a
+ ret i32 %cond
+}
+
+define i32 @movmsk_eq_v8i32_3(<8 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v8i32_3:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: packsswb %xmm0, %xmm0
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: testb $8, %cl
+; SSE-NEXT: cmovel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_eq_v8i32_3:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskps %xmm0, %ecx
+; AVX-NEXT: testb $8, %cl
+; AVX-NEXT: cmovel %esi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cmp = icmp slt <8 x i32> %v, zeroinitializer
+ %msk = bitcast <8 x i1> %cmp to i8
+ %bit = and i8 %msk, 8
+ %icmp = icmp eq i8 %bit, 0
+ %cond = select i1 %icmp, i32 %b, i32 %a
+ ret i32 %cond
+}
+
+define i32 @movmsk_slt_v8i32_7(<8 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v8i32_7:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: packssdw %xmm1, %xmm0
+; SSE-NEXT: packsswb %xmm0, %xmm0
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: testb %cl, %cl
+; SSE-NEXT: cmovnsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_slt_v8i32_7:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskps %ymm0, %ecx
+; AVX-NEXT: testb %cl, %cl
+; AVX-NEXT: cmovnsl %esi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cmp = icmp slt <8 x i32> %v, zeroinitializer
+ %msk = bitcast <8 x i1> %cmp to i8
+ %icmp = icmp slt i8 %msk, 0
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v8i32_7(<8 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v8i32_7:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: packssdw %xmm1, %xmm0
+; SSE-NEXT: packsswb %xmm0, %xmm0
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: testb %cl, %cl
+; SSE-NEXT: cmovsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_sgt_v8i32_7:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vmovmskps %ymm0, %ecx
+; AVX-NEXT: testb %cl, %cl
+; AVX-NEXT: cmovsl %esi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cmp = icmp slt <8 x i32> %v, zeroinitializer
+ %msk = bitcast <8 x i1> %cmp to i8
+ %icmp = icmp sgt i8 %msk, -1
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i32 @movmsk_eq_v32i8_0(<32 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v32i8_0:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: testb $1, %cl
+; SSE-NEXT: cmovel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: movmsk_eq_v32i8_0:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: testb $1, %cl
+; AVX-NEXT: cmovel %esi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cmp = icmp slt <32 x i8> %v, zeroinitializer
+ %msk = bitcast <32 x i1> %cmp to i32
+ %bit = and i32 %msk, 1
+ %icmp = icmp eq i32 %bit, 0
+ %cond = select i1 %icmp, i32 %b, i32 %a
+ ret i32 %cond
+}
+
+define i32 @movmsk_eq_v32i8_30(<32 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v32i8_30:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: pmovmskb %xmm1, %ecx
+; SSE-NEXT: shll $16, %ecx
+; SSE-NEXT: testl $1073741824, %ecx # imm = 0x40000000
+; SSE-NEXT: cmovel %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: movmsk_eq_v32i8_30:
+; AVX1: # %bb.0:
+; AVX1-NEXT: movl %edi, %eax
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: shll $16, %ecx
+; AVX1-NEXT: testl $1073741824, %ecx # imm = 0x40000000
+; AVX1-NEXT: cmovel %esi, %eax
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: movmsk_eq_v32i8_30:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %edi, %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %ecx
+; AVX2-NEXT: testl $1073741824, %ecx # imm = 0x40000000
+; AVX2-NEXT: cmovel %esi, %eax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: movmsk_eq_v32i8_30:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movl %edi, %eax
+; AVX512-NEXT: vpmovmskb %ymm0, %ecx
+; AVX512-NEXT: testl $1073741824, %ecx # imm = 0x40000000
+; AVX512-NEXT: cmovel %esi, %eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %cmp = icmp slt <32 x i8> %v, zeroinitializer
+ %msk = bitcast <32 x i1> %cmp to i32
+ %bit = and i32 %msk, 1073741824
+ %icmp = icmp eq i32 %bit, 0
+ %cond = select i1 %icmp, i32 %b, i32 %a
+ ret i32 %cond
+}
+
+define i32 @movmsk_slt_v32i8_31(<32 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v32i8_31:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: pmovmskb %xmm1, %edx
+; SSE-NEXT: shll $16, %edx
+; SSE-NEXT: orl %ecx, %edx
+; SSE-NEXT: cmovnsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: movmsk_slt_v32i8_31:
+; AVX1: # %bb.0:
+; AVX1-NEXT: movl %edi, %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpmovmskb %xmm0, %edx
+; AVX1-NEXT: shll $16, %edx
+; AVX1-NEXT: orl %ecx, %edx
+; AVX1-NEXT: cmovnsl %esi, %eax
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: movmsk_slt_v32i8_31:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %edi, %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %ecx
+; AVX2-NEXT: testl %ecx, %ecx
+; AVX2-NEXT: cmovnsl %esi, %eax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: movmsk_slt_v32i8_31:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movl %edi, %eax
+; AVX512-NEXT: vpmovmskb %ymm0, %ecx
+; AVX512-NEXT: testl %ecx, %ecx
+; AVX512-NEXT: cmovnsl %esi, %eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %cmp = icmp slt <32 x i8> %v, zeroinitializer
+ %msk = bitcast <32 x i1> %cmp to i32
+ %icmp = icmp slt i32 %msk, 0
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v32i8_31(<32 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v32i8_31:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: pmovmskb %xmm1, %edx
+; SSE-NEXT: shll $16, %edx
+; SSE-NEXT: orl %ecx, %edx
+; SSE-NEXT: cmovsl %esi, %eax
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: movmsk_sgt_v32i8_31:
+; AVX1: # %bb.0:
+; AVX1-NEXT: movl %edi, %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpmovmskb %xmm0, %edx
+; AVX1-NEXT: shll $16, %edx
+; AVX1-NEXT: orl %ecx, %edx
+; AVX1-NEXT: cmovsl %esi, %eax
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: movmsk_sgt_v32i8_31:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %edi, %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %ecx
+; AVX2-NEXT: testl %ecx, %ecx
+; AVX2-NEXT: cmovsl %esi, %eax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: movmsk_sgt_v32i8_31:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movl %edi, %eax
+; AVX512-NEXT: vpmovmskb %ymm0, %ecx
+; AVX512-NEXT: testl %ecx, %ecx
+; AVX512-NEXT: cmovsl %esi, %eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %cmp = icmp slt <32 x i8> %v, zeroinitializer
+ %msk = bitcast <32 x i1> %cmp to i32
+ %icmp = icmp sgt i32 %msk, -1
+ %cond = select i1 %icmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX1OR2: {{.*}}
+; SSE2: {{.*}}
+; SSE41: {{.*}}
More information about the llvm-commits
mailing list