[llvm] e6bc72d - [X86] Add MOVMSK bit extraction test coverage for #66191

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 07:28:23 PDT 2024


Author: Simon Pilgrim
Date: 2024-07-01T15:28:07+01:00
New Revision: e6bc72dfe885e8ef4eae1c7da87dd13e40cfed6d

URL: https://github.com/llvm/llvm-project/commit/e6bc72dfe885e8ef4eae1c7da87dd13e40cfed6d
DIFF: https://github.com/llvm/llvm-project/commit/e6bc72dfe885e8ef4eae1c7da87dd13e40cfed6d.diff

LOG: [X86] Add MOVMSK bit extraction test coverage for #66191

Regressions due to middle-end canonicalizing ICMP_EQ(AND(X,MSB),0) -> ICMP_SGT(X,-1) etc.

Added: 
    llvm/test/CodeGen/X86/movmsk-bittest.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/movmsk-bittest.ll b/llvm/test/CodeGen/X86/movmsk-bittest.ll
new file mode 100644
index 0000000000000..7c8fe03ff4741
--- /dev/null
+++ b/llvm/test/CodeGen/X86/movmsk-bittest.ll
@@ -0,0 +1,583 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
+
+
+; PR66191 - movmsk msb bit extraction
+
+define i32 @movmsk_eq_v2i64_0(<2 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v2i64_0:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    movmskpd %xmm0, %ecx
+; SSE-NEXT:    testb $1, %cl
+; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_eq_v2i64_0:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX-NEXT:    testb $1, %cl
+; AVX-NEXT:    cmovel %esi, %eax
+; AVX-NEXT:    retq
+  %cmp = icmp slt <2 x i64> %v, zeroinitializer
+  %msk = bitcast <2 x i1> %cmp to i2
+  %bit = and i2 %msk, 1
+  %icmp = icmp eq i2 %bit, 0
+  %cond = select i1 %icmp, i32 %b, i32 %a
+  ret i32 %cond
+}
+
+define i32 @movmsk_slt_v2i64_1(<2 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v2i64_1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    movmskpd %xmm0, %ecx
+; SSE-NEXT:    shlb $6, %cl
+; SSE-NEXT:    sarb $6, %cl
+; SSE-NEXT:    cmovnsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_slt_v2i64_1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX-NEXT:    shlb $6, %cl
+; AVX-NEXT:    sarb $6, %cl
+; AVX-NEXT:    cmovnsl %esi, %eax
+; AVX-NEXT:    retq
+  %cmp = icmp slt <2 x i64> %v, zeroinitializer
+  %msk = bitcast <2 x i1> %cmp to i2
+  %icmp = icmp slt i2 %msk, 0
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v2i64_1(<2 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v2i64_1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    movmskpd %xmm0, %ecx
+; SSE-NEXT:    shlb $6, %cl
+; SSE-NEXT:    sarb $6, %cl
+; SSE-NEXT:    cmovsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_sgt_v2i64_1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX-NEXT:    shlb $6, %cl
+; AVX-NEXT:    sarb $6, %cl
+; AVX-NEXT:    cmovsl %esi, %eax
+; AVX-NEXT:    retq
+  %cmp = icmp slt <2 x i64> %v, zeroinitializer
+  %msk = bitcast <2 x i1> %cmp to i2
+  %icmp = icmp sgt i2 %msk, -1
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i32 @movmsk_eq_v4i32_0(<4 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v4i32_0:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    movmskps %xmm0, %ecx
+; SSE-NEXT:    testb $1, %cl
+; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_eq_v4i32_0:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskps %xmm0, %ecx
+; AVX-NEXT:    testb $1, %cl
+; AVX-NEXT:    cmovel %esi, %eax
+; AVX-NEXT:    retq
+  %cmp = icmp slt <4 x i32> %v, zeroinitializer
+  %msk = bitcast <4 x i1> %cmp to i4
+  %bit = and i4 %msk, 1
+  %icmp = icmp eq i4 %bit, 0
+  %cond = select i1 %icmp, i32 %b, i32 %a
+  ret i32 %cond
+}
+
+define i32 @movmsk_slt_v4i32_3(<4 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v4i32_3:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    movmskps %xmm0, %ecx
+; SSE-NEXT:    shlb $4, %cl
+; SSE-NEXT:    sarb $4, %cl
+; SSE-NEXT:    cmovnsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_slt_v4i32_3:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskps %xmm0, %ecx
+; AVX-NEXT:    shlb $4, %cl
+; AVX-NEXT:    sarb $4, %cl
+; AVX-NEXT:    cmovnsl %esi, %eax
+; AVX-NEXT:    retq
+  %cmp = icmp slt <4 x i32> %v, zeroinitializer
+  %msk = bitcast <4 x i1> %cmp to i4
+  %icmp = icmp slt i4 %msk, 0
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v4i32_3(<4 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v4i32_3:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    movmskps %xmm0, %ecx
+; SSE-NEXT:    shlb $4, %cl
+; SSE-NEXT:    sarb $4, %cl
+; SSE-NEXT:    cmovsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_sgt_v4i32_3:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskps %xmm0, %ecx
+; AVX-NEXT:    shlb $4, %cl
+; AVX-NEXT:    sarb $4, %cl
+; AVX-NEXT:    cmovsl %esi, %eax
+; AVX-NEXT:    retq
+  %cmp = icmp slt <4 x i32> %v, zeroinitializer
+  %msk = bitcast <4 x i1> %cmp to i4
+  %icmp = icmp sgt i4 %msk, -1
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i32 @movmsk_eq_v16i8_0(<16 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v16i8_0:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    testb $1, %cl
+; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_eq_v16i8_0:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX-NEXT:    testb $1, %cl
+; AVX-NEXT:    cmovel %esi, %eax
+; AVX-NEXT:    retq
+  %cmp = icmp slt <16 x i8> %v, zeroinitializer
+  %msk = bitcast <16 x i1> %cmp to i16
+  %bit = and i16 %msk, 1
+  %icmp = icmp eq i16 %bit, 0
+  %cond = select i1 %icmp, i32 %b, i32 %a
+  ret i32 %cond
+}
+
+define i32 @movmsk_slt_v16i8_15(<16 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v16i8_15:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    testw %cx, %cx
+; SSE-NEXT:    cmovnsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_slt_v16i8_15:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX-NEXT:    testw %cx, %cx
+; AVX-NEXT:    cmovnsl %esi, %eax
+; AVX-NEXT:    retq
+  %cmp = icmp slt <16 x i8> %v, zeroinitializer
+  %msk = bitcast <16 x i1> %cmp to i16
+  %icmp = icmp slt i16 %msk, 0
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v16i8_15(<16 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v16i8_15:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    testw %cx, %cx
+; SSE-NEXT:    cmovsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_sgt_v16i8_15:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX-NEXT:    testw %cx, %cx
+; AVX-NEXT:    cmovsl %esi, %eax
+; AVX-NEXT:    retq
+  %cmp = icmp slt <16 x i8> %v, zeroinitializer
+  %msk = bitcast <16 x i1> %cmp to i16
+  %icmp = icmp sgt i16 %msk, -1
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i32 @movmsk_eq_v4i64_0(<4 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v4i64_0:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE-NEXT:    movmskps %xmm0, %ecx
+; SSE-NEXT:    testb $1, %cl
+; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_eq_v4i64_0:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX-NEXT:    testb $1, %cl
+; AVX-NEXT:    cmovel %esi, %eax
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %cmp = icmp slt <4 x i64> %v, zeroinitializer
+  %msk = bitcast <4 x i1> %cmp to i4
+  %bit = and i4 %msk, 1
+  %icmp = icmp eq i4 %bit, 0
+  %cond = select i1 %icmp, i32 %b, i32 %a
+  ret i32 %cond
+}
+
+define i32 @movmsk_slt_v4i64_3(<4 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v4i64_3:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; SSE-NEXT:    movmskps %xmm0, %ecx
+; SSE-NEXT:    shlb $4, %cl
+; SSE-NEXT:    sarb $4, %cl
+; SSE-NEXT:    cmovnsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_slt_v4i64_3:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskpd %ymm0, %ecx
+; AVX-NEXT:    shlb $4, %cl
+; AVX-NEXT:    sarb $4, %cl
+; AVX-NEXT:    cmovnsl %esi, %eax
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %cmp = icmp slt <4 x i64> %v, zeroinitializer
+  %msk = bitcast <4 x i1> %cmp to i4
+  %icmp = icmp slt i4 %msk, 0
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v4i64_3(<4 x i64> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v4i64_3:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; SSE-NEXT:    movmskps %xmm0, %ecx
+; SSE-NEXT:    shlb $4, %cl
+; SSE-NEXT:    sarb $4, %cl
+; SSE-NEXT:    cmovsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_sgt_v4i64_3:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskpd %ymm0, %ecx
+; AVX-NEXT:    shlb $4, %cl
+; AVX-NEXT:    sarb $4, %cl
+; AVX-NEXT:    cmovsl %esi, %eax
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %cmp = icmp slt <4 x i64> %v, zeroinitializer
+  %msk = bitcast <4 x i1> %cmp to i4
+  %icmp = icmp sgt i4 %msk, -1
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i32 @movmsk_eq_v8i32_0(<8 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v8i32_0:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    packssdw %xmm0, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    testb $1, %cl
+; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_eq_v8i32_0:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskps %xmm0, %ecx
+; AVX-NEXT:    testb $1, %cl
+; AVX-NEXT:    cmovel %esi, %eax
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %cmp = icmp slt <8 x i32> %v, zeroinitializer
+  %msk = bitcast <8 x i1> %cmp to i8
+  %bit = and i8 %msk, 1
+  %icmp = icmp eq i8 %bit, 0
+  %cond = select i1 %icmp, i32 %b, i32 %a
+  ret i32 %cond
+}
+
+define i32 @movmsk_eq_v8i32_3(<8 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v8i32_3:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    packssdw %xmm0, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    testb $8, %cl
+; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_eq_v8i32_3:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskps %xmm0, %ecx
+; AVX-NEXT:    testb $8, %cl
+; AVX-NEXT:    cmovel %esi, %eax
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %cmp = icmp slt <8 x i32> %v, zeroinitializer
+  %msk = bitcast <8 x i1> %cmp to i8
+  %bit = and i8 %msk, 8
+  %icmp = icmp eq i8 %bit, 0
+  %cond = select i1 %icmp, i32 %b, i32 %a
+  ret i32 %cond
+}
+
+define i32 @movmsk_slt_v8i32_7(<8 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v8i32_7:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    testb %cl, %cl
+; SSE-NEXT:    cmovnsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_slt_v8i32_7:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskps %ymm0, %ecx
+; AVX-NEXT:    testb %cl, %cl
+; AVX-NEXT:    cmovnsl %esi, %eax
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %cmp = icmp slt <8 x i32> %v, zeroinitializer
+  %msk = bitcast <8 x i1> %cmp to i8
+  %icmp = icmp slt i8 %msk, 0
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v8i32_7(<8 x i32> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v8i32_7:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    testb %cl, %cl
+; SSE-NEXT:    cmovsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_sgt_v8i32_7:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vmovmskps %ymm0, %ecx
+; AVX-NEXT:    testb %cl, %cl
+; AVX-NEXT:    cmovsl %esi, %eax
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %cmp = icmp slt <8 x i32> %v, zeroinitializer
+  %msk = bitcast <8 x i1> %cmp to i8
+  %icmp = icmp sgt i8 %msk, -1
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i32 @movmsk_eq_v32i8_0(<32 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v32i8_0:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    testb $1, %cl
+; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: movmsk_eq_v32i8_0:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX-NEXT:    testb $1, %cl
+; AVX-NEXT:    cmovel %esi, %eax
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %cmp = icmp slt <32 x i8> %v, zeroinitializer
+  %msk = bitcast <32 x i1> %cmp to i32
+  %bit = and i32 %msk, 1
+  %icmp = icmp eq i32 %bit, 0
+  %cond = select i1 %icmp, i32 %b, i32 %a
+  ret i32 %cond
+}
+
+define i32 @movmsk_eq_v32i8_30(<32 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_eq_v32i8_30:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    pmovmskb %xmm1, %ecx
+; SSE-NEXT:    shll $16, %ecx
+; SSE-NEXT:    testl $1073741824, %ecx # imm = 0x40000000
+; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: movmsk_eq_v32i8_30:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    movl %edi, %eax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX1-NEXT:    shll $16, %ecx
+; AVX1-NEXT:    testl $1073741824, %ecx # imm = 0x40000000
+; AVX1-NEXT:    cmovel %esi, %eax
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: movmsk_eq_v32i8_30:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    movl %edi, %eax
+; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
+; AVX2-NEXT:    testl $1073741824, %ecx # imm = 0x40000000
+; AVX2-NEXT:    cmovel %esi, %eax
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: movmsk_eq_v32i8_30:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movl %edi, %eax
+; AVX512-NEXT:    vpmovmskb %ymm0, %ecx
+; AVX512-NEXT:    testl $1073741824, %ecx # imm = 0x40000000
+; AVX512-NEXT:    cmovel %esi, %eax
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %cmp = icmp slt <32 x i8> %v, zeroinitializer
+  %msk = bitcast <32 x i1> %cmp to i32
+  %bit = and i32 %msk, 1073741824
+  %icmp = icmp eq i32 %bit, 0
+  %cond = select i1 %icmp, i32 %b, i32 %a
+  ret i32 %cond
+}
+
+define i32 @movmsk_slt_v32i8_31(<32 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_slt_v32i8_31:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    pmovmskb %xmm1, %edx
+; SSE-NEXT:    shll $16, %edx
+; SSE-NEXT:    orl %ecx, %edx
+; SSE-NEXT:    cmovnsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: movmsk_slt_v32i8_31:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    movl %edi, %eax
+; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %edx
+; AVX1-NEXT:    shll $16, %edx
+; AVX1-NEXT:    orl %ecx, %edx
+; AVX1-NEXT:    cmovnsl %esi, %eax
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: movmsk_slt_v32i8_31:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    movl %edi, %eax
+; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
+; AVX2-NEXT:    testl %ecx, %ecx
+; AVX2-NEXT:    cmovnsl %esi, %eax
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: movmsk_slt_v32i8_31:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movl %edi, %eax
+; AVX512-NEXT:    vpmovmskb %ymm0, %ecx
+; AVX512-NEXT:    testl %ecx, %ecx
+; AVX512-NEXT:    cmovnsl %esi, %eax
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %cmp = icmp slt <32 x i8> %v, zeroinitializer
+  %msk = bitcast <32 x i1> %cmp to i32
+  %icmp = icmp slt i32 %msk, 0
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i32 @movmsk_sgt_v32i8_31(<32 x i8> %v, i32 %a, i32 %b) {
+; SSE-LABEL: movmsk_sgt_v32i8_31:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    pmovmskb %xmm1, %edx
+; SSE-NEXT:    shll $16, %edx
+; SSE-NEXT:    orl %ecx, %edx
+; SSE-NEXT:    cmovsl %esi, %eax
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: movmsk_sgt_v32i8_31:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    movl %edi, %eax
+; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %edx
+; AVX1-NEXT:    shll $16, %edx
+; AVX1-NEXT:    orl %ecx, %edx
+; AVX1-NEXT:    cmovsl %esi, %eax
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: movmsk_sgt_v32i8_31:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    movl %edi, %eax
+; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
+; AVX2-NEXT:    testl %ecx, %ecx
+; AVX2-NEXT:    cmovsl %esi, %eax
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: movmsk_sgt_v32i8_31:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movl %edi, %eax
+; AVX512-NEXT:    vpmovmskb %ymm0, %ecx
+; AVX512-NEXT:    testl %ecx, %ecx
+; AVX512-NEXT:    cmovsl %esi, %eax
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %cmp = icmp slt <32 x i8> %v, zeroinitializer
+  %msk = bitcast <32 x i1> %cmp to i32
+  %icmp = icmp sgt i32 %msk, -1
+  %cond = select i1 %icmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX1OR2: {{.*}}
+; SSE2: {{.*}}
+; SSE41: {{.*}}


        


More information about the llvm-commits mailing list