[llvm] abb9cbb - [X86][SSE] Add tests for non-constant bool vector extractions

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 19 06:28:45 PDT 2022


Author: Simon Pilgrim
Date: 2022-03-19T13:25:21Z
New Revision: abb9cbb22e993bb8af1be592f527eae3c48ebcbd

URL: https://github.com/llvm/llvm-project/commit/abb9cbb22e993bb8af1be592f527eae3c48ebcbd
DIFF: https://github.com/llvm/llvm-project/commit/abb9cbb22e993bb8af1be592f527eae3c48ebcbd.diff

LOG: [X86][SSE] Add tests for non-constant bool vector extractions

We should be able to perform this with MOVMSK+TEST/BT instead of spilling to stack

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/movmsk-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 925f0a9eecd4b..955266a782c40 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -4259,6 +4259,305 @@ define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) {
   ret i1 %u1
 }
 
+; Extract elements from a non-constant index.
+
+define i1 @movmsk_v16i8_var(<16 x i8> %x, <16 x i8> %y, i32 %z) {
+; SSE-LABEL: movmsk_v16i8_var:
+; SSE:       # %bb.0:
+; SSE-NEXT:    # kill: def $edi killed $edi def $rdi
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    andl $15, %edi
+; SSE-NEXT:    movb -24(%rsp,%rdi), %al
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: movmsk_v16i8_var:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT:    andl $15, %edi
+; AVX1OR2-NEXT:    movb -24(%rsp,%rdi), %al
+; AVX1OR2-NEXT:    retq
+;
+; KNL-LABEL: movmsk_v16i8_var:
+; KNL:       # %bb.0:
+; KNL-NEXT:    # kill: def $edi killed $edi def $rdi
+; KNL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT:    andl $15, %edi
+; KNL-NEXT:    movb -24(%rsp,%rdi), %al
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: movmsk_v16i8_var:
+; SKX:       # %bb.0:
+; SKX-NEXT:    # kill: def $edi killed $edi def $rdi
+; SKX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
+; SKX-NEXT:    vpmovm2b %k0, %xmm0
+; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    andl $15, %edi
+; SKX-NEXT:    movb -24(%rsp,%rdi), %al
+; SKX-NEXT:    retq
+  %cmp = icmp eq <16 x i8> %x, %y
+  %val = extractelement <16 x i1> %cmp, i32 %z
+  ret i1 %val
+}
+
+define i1 @movmsk_v8i16_var(<8 x i16> %x, <8 x i16> %y, i32 %z) {
+; SSE-LABEL: movmsk_v8i16_var:
+; SSE:       # %bb.0:
+; SSE-NEXT:    # kill: def $edi killed $edi def $rdi
+; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
+; SSE-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    andl $7, %edi
+; SSE-NEXT:    movb -24(%rsp,%rdi,2), %al
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: movmsk_v8i16_var:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT:    andl $7, %edi
+; AVX1OR2-NEXT:    movb -24(%rsp,%rdi,2), %al
+; AVX1OR2-NEXT:    retq
+;
+; KNL-LABEL: movmsk_v8i16_var:
+; KNL:       # %bb.0:
+; KNL-NEXT:    # kill: def $edi killed $edi def $rdi
+; KNL-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT:    andl $7, %edi
+; KNL-NEXT:    movb -24(%rsp,%rdi,2), %al
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: movmsk_v8i16_var:
+; SKX:       # %bb.0:
+; SKX-NEXT:    # kill: def $edi killed $edi def $rdi
+; SKX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
+; SKX-NEXT:    vpmovm2w %k0, %xmm0
+; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    andl $7, %edi
+; SKX-NEXT:    movb -24(%rsp,%rdi,2), %al
+; SKX-NEXT:    retq
+  %cmp = icmp sgt <8 x i16> %x, %y
+  %val = extractelement <8 x i1> %cmp, i32 %z
+  ret i1 %val
+}
+
+define i1 @movmsk_v4i32_var(<4 x i32> %x, <4 x i32> %y, i32 %z) {
+; SSE-LABEL: movmsk_v4i32_var:
+; SSE:       # %bb.0:
+; SSE-NEXT:    # kill: def $edi killed $edi def $rdi
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    andl $3, %edi
+; SSE-NEXT:    movb -24(%rsp,%rdi,4), %al
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: movmsk_v4i32_var:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT:    andl $3, %edi
+; AVX1OR2-NEXT:    movb -24(%rsp,%rdi,4), %al
+; AVX1OR2-NEXT:    retq
+;
+; KNL-LABEL: movmsk_v4i32_var:
+; KNL:       # %bb.0:
+; KNL-NEXT:    # kill: def $edi killed $edi def $rdi
+; KNL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT:    andl $3, %edi
+; KNL-NEXT:    movb -24(%rsp,%rdi,4), %al
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: movmsk_v4i32_var:
+; SKX:       # %bb.0:
+; SKX-NEXT:    # kill: def $edi killed $edi def $rdi
+; SKX-NEXT:    vpcmpgtd %xmm0, %xmm1, %k0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    andl $3, %edi
+; SKX-NEXT:    movb -24(%rsp,%rdi,4), %al
+; SKX-NEXT:    retq
+  %cmp = icmp slt <4 x i32> %x, %y
+  %val = extractelement <4 x i1> %cmp, i32 %z
+  ret i1 %val
+}
+
+define i1 @movmsk_v2i64_var(<2 x i64> %x, <2 x i64> %y, i32 %z) {
+; SSE2-LABEL: movmsk_v2i64_var:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    # kill: def $edi killed $edi def $rdi
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT:    andl $1, %edi
+; SSE2-NEXT:    movb -24(%rsp,%rdi,8), %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: movmsk_v2i64_var:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    # kill: def $edi killed $edi def $rdi
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT:    pxor %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; SSE41-NEXT:    andl $1, %edi
+; SSE41-NEXT:    movb -24(%rsp,%rdi,8), %al
+; SSE41-NEXT:    retq
+;
+; AVX1OR2-LABEL: movmsk_v2i64_var:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT:    andl $1, %edi
+; AVX1OR2-NEXT:    movb -24(%rsp,%rdi,8), %al
+; AVX1OR2-NEXT:    retq
+;
+; KNL-LABEL: movmsk_v2i64_var:
+; KNL:       # %bb.0:
+; KNL-NEXT:    # kill: def $edi killed $edi def $rdi
+; KNL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL-NEXT:    vpcmpneqq %zmm1, %zmm0, %k1
+; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT:    andl $1, %edi
+; KNL-NEXT:    movb -24(%rsp,%rdi,8), %al
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: movmsk_v2i64_var:
+; SKX:       # %bb.0:
+; SKX-NEXT:    # kill: def $edi killed $edi def $rdi
+; SKX-NEXT:    vpcmpneqq %xmm1, %xmm0, %k0
+; SKX-NEXT:    vpmovm2q %k0, %xmm0
+; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    andl $1, %edi
+; SKX-NEXT:    movb -24(%rsp,%rdi,8), %al
+; SKX-NEXT:    retq
+  %cmp = icmp ne <2 x i64> %x, %y
+  %val = extractelement <2 x i1> %cmp, i32 %z
+  ret i1 %val
+}
+
+define i1 @movmsk_v4f32_var(<4 x float> %x, <4 x float> %y, i32 %z) {
+; SSE-LABEL: movmsk_v4f32_var:
+; SSE:       # %bb.0:
+; SSE-NEXT:    # kill: def $edi killed $edi def $rdi
+; SSE-NEXT:    movaps %xmm0, %xmm2
+; SSE-NEXT:    cmpeqps %xmm1, %xmm2
+; SSE-NEXT:    cmpunordps %xmm1, %xmm0
+; SSE-NEXT:    orps %xmm2, %xmm0
+; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    andl $3, %edi
+; SSE-NEXT:    movb -24(%rsp,%rdi,4), %al
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: movmsk_v4f32_var:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT:    vcmpeq_uqps %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT:    andl $3, %edi
+; AVX1OR2-NEXT:    movb -24(%rsp,%rdi,4), %al
+; AVX1OR2-NEXT:    retq
+;
+; KNL-LABEL: movmsk_v4f32_var:
+; KNL:       # %bb.0:
+; KNL-NEXT:    # kill: def $edi killed $edi def $rdi
+; KNL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL-NEXT:    vcmpeq_uqps %zmm1, %zmm0, %k1
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT:    andl $3, %edi
+; KNL-NEXT:    movb -24(%rsp,%rdi,4), %al
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: movmsk_v4f32_var:
+; SKX:       # %bb.0:
+; SKX-NEXT:    # kill: def $edi killed $edi def $rdi
+; SKX-NEXT:    vcmpeq_uqps %xmm1, %xmm0, %k0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    andl $3, %edi
+; SKX-NEXT:    movb -24(%rsp,%rdi,4), %al
+; SKX-NEXT:    retq
+  %cmp = fcmp ueq <4 x float> %x, %y
+  %val = extractelement <4 x i1> %cmp, i32 %z
+  ret i1 %val
+}
+
+define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) {
+; SSE-LABEL: movmsk_v2f64_var:
+; SSE:       # %bb.0:
+; SSE-NEXT:    # kill: def $edi killed $edi def $rdi
+; SSE-NEXT:    cmplepd %xmm0, %xmm1
+; SSE-NEXT:    movapd %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    andl $1, %edi
+; SSE-NEXT:    movb -24(%rsp,%rdi,8), %al
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: movmsk_v2f64_var:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT:    vmovapd %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT:    andl $1, %edi
+; AVX1OR2-NEXT:    movb -24(%rsp,%rdi,8), %al
+; AVX1OR2-NEXT:    retq
+;
+; KNL-LABEL: movmsk_v2f64_var:
+; KNL:       # %bb.0:
+; KNL-NEXT:    # kill: def $edi killed $edi def $rdi
+; KNL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL-NEXT:    vcmplepd %zmm0, %zmm1, %k1
+; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT:    andl $1, %edi
+; KNL-NEXT:    movb -24(%rsp,%rdi,8), %al
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: movmsk_v2f64_var:
+; SKX:       # %bb.0:
+; SKX-NEXT:    # kill: def $edi killed $edi def $rdi
+; SKX-NEXT:    vcmplepd %xmm0, %xmm1, %k0
+; SKX-NEXT:    vpmovm2q %k0, %xmm0
+; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    andl $1, %edi
+; SKX-NEXT:    movb -24(%rsp,%rdi,8), %al
+; SKX-NEXT:    retq
+  %cmp = fcmp oge <2 x double> %x, %y
+  %val = extractelement <2 x i1> %cmp, i32 %z
+  ret i1 %val
+}
+
 define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
 ; SSE-LABEL: PR39665_c_ray:
 ; SSE:       # %bb.0:


        


More information about the llvm-commits mailing list