[llvm] abb9cbb - [X86][SSE] Add tests for non-constant bool vector extractions
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 19 06:28:45 PDT 2022
Author: Simon Pilgrim
Date: 2022-03-19T13:25:21Z
New Revision: abb9cbb22e993bb8af1be592f527eae3c48ebcbd
URL: https://github.com/llvm/llvm-project/commit/abb9cbb22e993bb8af1be592f527eae3c48ebcbd
DIFF: https://github.com/llvm/llvm-project/commit/abb9cbb22e993bb8af1be592f527eae3c48ebcbd.diff
LOG: [X86][SSE] Add tests for non-constant bool vector extractions
We should be able to perform this with MOVMSK+TEST/BT instead of spilling to stack
Added:
Modified:
llvm/test/CodeGen/X86/movmsk-cmp.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 925f0a9eecd4b..955266a782c40 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -4259,6 +4259,305 @@ define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) {
ret i1 %u1
}
+; Extract elements from a non-constant index.
+
+define i1 @movmsk_v16i8_var(<16 x i8> %x, <16 x i8> %y, i32 %z) {
+; SSE-LABEL: movmsk_v16i8_var:
+; SSE: # %bb.0:
+; SSE-NEXT: # kill: def $edi killed $edi def $rdi
+; SSE-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: andl $15, %edi
+; SSE-NEXT: movb -24(%rsp,%rdi), %al
+; SSE-NEXT: retq
+;
+; AVX1OR2-LABEL: movmsk_v16i8_var:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT: andl $15, %edi
+; AVX1OR2-NEXT: movb -24(%rsp,%rdi), %al
+; AVX1OR2-NEXT: retq
+;
+; KNL-LABEL: movmsk_v16i8_var:
+; KNL: # %bb.0:
+; KNL-NEXT: # kill: def $edi killed $edi def $rdi
+; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT: andl $15, %edi
+; KNL-NEXT: movb -24(%rsp,%rdi), %al
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+;
+; SKX-LABEL: movmsk_v16i8_var:
+; SKX: # %bb.0:
+; SKX-NEXT: # kill: def $edi killed $edi def $rdi
+; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
+; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: andl $15, %edi
+; SKX-NEXT: movb -24(%rsp,%rdi), %al
+; SKX-NEXT: retq
+ %cmp = icmp eq <16 x i8> %x, %y
+ %val = extractelement <16 x i1> %cmp, i32 %z
+ ret i1 %val
+}
+
+define i1 @movmsk_v8i16_var(<8 x i16> %x, <8 x i16> %y, i32 %z) {
+; SSE-LABEL: movmsk_v8i16_var:
+; SSE: # %bb.0:
+; SSE-NEXT: # kill: def $edi killed $edi def $rdi
+; SSE-NEXT: pcmpgtw %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: andl $7, %edi
+; SSE-NEXT: movb -24(%rsp,%rdi,2), %al
+; SSE-NEXT: retq
+;
+; AVX1OR2-LABEL: movmsk_v8i16_var:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT: andl $7, %edi
+; AVX1OR2-NEXT: movb -24(%rsp,%rdi,2), %al
+; AVX1OR2-NEXT: retq
+;
+; KNL-LABEL: movmsk_v8i16_var:
+; KNL: # %bb.0:
+; KNL-NEXT: # kill: def $edi killed $edi def $rdi
+; KNL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT: andl $7, %edi
+; KNL-NEXT: movb -24(%rsp,%rdi,2), %al
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+;
+; SKX-LABEL: movmsk_v8i16_var:
+; SKX: # %bb.0:
+; SKX-NEXT: # kill: def $edi killed $edi def $rdi
+; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: andl $7, %edi
+; SKX-NEXT: movb -24(%rsp,%rdi,2), %al
+; SKX-NEXT: retq
+ %cmp = icmp sgt <8 x i16> %x, %y
+ %val = extractelement <8 x i1> %cmp, i32 %z
+ ret i1 %val
+}
+
+define i1 @movmsk_v4i32_var(<4 x i32> %x, <4 x i32> %y, i32 %z) {
+; SSE-LABEL: movmsk_v4i32_var:
+; SSE: # %bb.0:
+; SSE-NEXT: # kill: def $edi killed $edi def $rdi
+; SSE-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: andl $3, %edi
+; SSE-NEXT: movb -24(%rsp,%rdi,4), %al
+; SSE-NEXT: retq
+;
+; AVX1OR2-LABEL: movmsk_v4i32_var:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT: andl $3, %edi
+; AVX1OR2-NEXT: movb -24(%rsp,%rdi,4), %al
+; AVX1OR2-NEXT: retq
+;
+; KNL-LABEL: movmsk_v4i32_var:
+; KNL: # %bb.0:
+; KNL-NEXT: # kill: def $edi killed $edi def $rdi
+; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT: andl $3, %edi
+; KNL-NEXT: movb -24(%rsp,%rdi,4), %al
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+;
+; SKX-LABEL: movmsk_v4i32_var:
+; SKX: # %bb.0:
+; SKX-NEXT: # kill: def $edi killed $edi def $rdi
+; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: andl $3, %edi
+; SKX-NEXT: movb -24(%rsp,%rdi,4), %al
+; SKX-NEXT: retq
+ %cmp = icmp slt <4 x i32> %x, %y
+ %val = extractelement <4 x i1> %cmp, i32 %z
+ ret i1 %val
+}
+
+define i1 @movmsk_v2i64_var(<2 x i64> %x, <2 x i64> %y, i32 %z) {
+; SSE2-LABEL: movmsk_v2i64_var:
+; SSE2: # %bb.0:
+; SSE2-NEXT: # kill: def $edi killed $edi def $rdi
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT: andl $1, %edi
+; SSE2-NEXT: movb -24(%rsp,%rdi,8), %al
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: movmsk_v2i64_var:
+; SSE41: # %bb.0:
+; SSE41-NEXT: # kill: def $edi killed $edi def $rdi
+; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; SSE41-NEXT: andl $1, %edi
+; SSE41-NEXT: movb -24(%rsp,%rdi,8), %al
+; SSE41-NEXT: retq
+;
+; AVX1OR2-LABEL: movmsk_v2i64_var:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT: andl $1, %edi
+; AVX1OR2-NEXT: movb -24(%rsp,%rdi,8), %al
+; AVX1OR2-NEXT: retq
+;
+; KNL-LABEL: movmsk_v2i64_var:
+; KNL: # %bb.0:
+; KNL-NEXT: # kill: def $edi killed $edi def $rdi
+; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL-NEXT: vpcmpneqq %zmm1, %zmm0, %k1
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT: andl $1, %edi
+; KNL-NEXT: movb -24(%rsp,%rdi,8), %al
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+;
+; SKX-LABEL: movmsk_v2i64_var:
+; SKX: # %bb.0:
+; SKX-NEXT: # kill: def $edi killed $edi def $rdi
+; SKX-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
+; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: andl $1, %edi
+; SKX-NEXT: movb -24(%rsp,%rdi,8), %al
+; SKX-NEXT: retq
+ %cmp = icmp ne <2 x i64> %x, %y
+ %val = extractelement <2 x i1> %cmp, i32 %z
+ ret i1 %val
+}
+
+define i1 @movmsk_v4f32_var(<4 x float> %x, <4 x float> %y, i32 %z) {
+; SSE-LABEL: movmsk_v4f32_var:
+; SSE: # %bb.0:
+; SSE-NEXT: # kill: def $edi killed $edi def $rdi
+; SSE-NEXT: movaps %xmm0, %xmm2
+; SSE-NEXT: cmpeqps %xmm1, %xmm2
+; SSE-NEXT: cmpunordps %xmm1, %xmm0
+; SSE-NEXT: orps %xmm2, %xmm0
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: andl $3, %edi
+; SSE-NEXT: movb -24(%rsp,%rdi,4), %al
+; SSE-NEXT: retq
+;
+; AVX1OR2-LABEL: movmsk_v4f32_var:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT: andl $3, %edi
+; AVX1OR2-NEXT: movb -24(%rsp,%rdi,4), %al
+; AVX1OR2-NEXT: retq
+;
+; KNL-LABEL: movmsk_v4f32_var:
+; KNL: # %bb.0:
+; KNL-NEXT: # kill: def $edi killed $edi def $rdi
+; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL-NEXT: vcmpeq_uqps %zmm1, %zmm0, %k1
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT: andl $3, %edi
+; KNL-NEXT: movb -24(%rsp,%rdi,4), %al
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+;
+; SKX-LABEL: movmsk_v4f32_var:
+; SKX: # %bb.0:
+; SKX-NEXT: # kill: def $edi killed $edi def $rdi
+; SKX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: andl $3, %edi
+; SKX-NEXT: movb -24(%rsp,%rdi,4), %al
+; SKX-NEXT: retq
+ %cmp = fcmp ueq <4 x float> %x, %y
+ %val = extractelement <4 x i1> %cmp, i32 %z
+ ret i1 %val
+}
+
+define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) {
+; SSE-LABEL: movmsk_v2f64_var:
+; SSE: # %bb.0:
+; SSE-NEXT: # kill: def $edi killed $edi def $rdi
+; SSE-NEXT: cmplepd %xmm0, %xmm1
+; SSE-NEXT: movapd %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: andl $1, %edi
+; SSE-NEXT: movb -24(%rsp,%rdi,8), %al
+; SSE-NEXT: retq
+;
+; AVX1OR2-LABEL: movmsk_v2f64_var:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: vmovapd %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1OR2-NEXT: andl $1, %edi
+; AVX1OR2-NEXT: movb -24(%rsp,%rdi,8), %al
+; AVX1OR2-NEXT: retq
+;
+; KNL-LABEL: movmsk_v2f64_var:
+; KNL: # %bb.0:
+; KNL-NEXT: # kill: def $edi killed $edi def $rdi
+; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k1
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT: andl $1, %edi
+; KNL-NEXT: movb -24(%rsp,%rdi,8), %al
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+;
+; SKX-LABEL: movmsk_v2f64_var:
+; SKX: # %bb.0:
+; SKX-NEXT: # kill: def $edi killed $edi def $rdi
+; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
+; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: andl $1, %edi
+; SKX-NEXT: movb -24(%rsp,%rdi,8), %al
+; SKX-NEXT: retq
+ %cmp = fcmp oge <2 x double> %x, %y
+ %val = extractelement <2 x i1> %cmp, i32 %z
+ ret i1 %val
+}
+
define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
; SSE-LABEL: PR39665_c_ray:
; SSE: # %bb.0:
More information about the llvm-commits
mailing list