[llvm] [X86] bitcnt-big-integer.ll - add additional test coverage where the source values are bitcast from vectors (PR #171481)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 9 09:49:52 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
---
Patch is 63.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171481.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/X86/bitcnt-big-integer.ll (+1634-22)
``````````diff
diff --git a/llvm/test/CodeGen/X86/bitcnt-big-integer.ll b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
index 0fd555991ae29..749b3ddc96d0d 100644
--- a/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
@@ -52,6 +52,63 @@ define i32 @load_ctpop_i128(ptr %p0) nounwind {
ret i32 %res
}
+define i32 @vector_ctpop_i128(<4 x i32> %v0) nounwind {
+; SSE-LABEL: vector_ctpop_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: pextrq $1, %xmm0, %rcx
+; SSE-NEXT: popcntq %rcx, %rcx
+; SSE-NEXT: popcntq %rax, %rax
+; SSE-NEXT: addl %ecx, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: vector_ctpop_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpextrq $1, %xmm0, %rax
+; AVX2-NEXT: vmovq %xmm0, %rcx
+; AVX2-NEXT: popcntq %rax, %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq %rcx, %rax
+; AVX2-NEXT: addl %edx, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: vector_ctpop_i128:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT: vmovq %xmm0, %rcx
+; AVX512F-NEXT: popcntq %rax, %rdx
+; AVX512F-NEXT: popcntq %rcx, %rax
+; AVX512F-NEXT: addl %edx, %eax
+; AVX512F-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: vector_ctpop_i128:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX512VL-NEXT: popcntq %rcx, %rcx
+; AVX512VL-NEXT: popcntq %rax, %rax
+; AVX512VL-NEXT: addl %ecx, %eax
+; AVX512VL-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT: retq
+;
+; AVX512POPCNT-LABEL: vector_ctpop_i128:
+; AVX512POPCNT: # %bb.0:
+; AVX512POPCNT-NEXT: vmovq %xmm0, %rax
+; AVX512POPCNT-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX512POPCNT-NEXT: popcntq %rcx, %rcx
+; AVX512POPCNT-NEXT: popcntq %rax, %rax
+; AVX512POPCNT-NEXT: addl %ecx, %eax
+; AVX512POPCNT-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT: retq
+ %a0 = bitcast <4 x i32> %v0 to i128
+ %cnt = call i128 @llvm.ctpop.i128(i128 %a0)
+ %res = trunc i128 %cnt to i32
+ ret i32 %res
+}
+
define i32 @test_ctpop_i256(i256 %a0) nounwind {
; CHECK-LABEL: test_ctpop_i256:
; CHECK: # %bb.0:
@@ -183,6 +240,107 @@ define i32 @load_ctpop_i256(ptr %p0) nounwind {
ret i32 %res
}
+define i32 @vector_ctpop_i256(<8 x i32> %v0) nounwind {
+; SSE-LABEL: vector_ctpop_i256:
+; SSE: # %bb.0:
+; SSE-NEXT: pextrq $1, %xmm0, %rax
+; SSE-NEXT: movq %xmm0, %rcx
+; SSE-NEXT: movq %xmm1, %rdx
+; SSE-NEXT: pextrq $1, %xmm1, %rsi
+; SSE-NEXT: popcntq %rsi, %rsi
+; SSE-NEXT: popcntq %rdx, %rdx
+; SSE-NEXT: addl %esi, %edx
+; SSE-NEXT: xorl %esi, %esi
+; SSE-NEXT: popcntq %rax, %rsi
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: popcntq %rcx, %rax
+; SSE-NEXT: addl %esi, %eax
+; SSE-NEXT: addl %edx, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: vector_ctpop_i256:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpextrq $1, %xmm0, %rax
+; AVX2-NEXT: vmovq %xmm0, %rcx
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vpextrq $1, %xmm0, %rdx
+; AVX2-NEXT: vmovq %xmm0, %rsi
+; AVX2-NEXT: popcntq %rdx, %rdx
+; AVX2-NEXT: popcntq %rsi, %rsi
+; AVX2-NEXT: addl %edx, %esi
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: popcntq %rax, %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: popcntq %rcx, %rax
+; AVX2-NEXT: addl %edx, %eax
+; AVX2-NEXT: addl %esi, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: vector_ctpop_i256:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT: vmovq %xmm0, %rcx
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rdx
+; AVX512F-NEXT: vmovq %xmm0, %rsi
+; AVX512F-NEXT: popcntq %rdx, %rdx
+; AVX512F-NEXT: popcntq %rsi, %rsi
+; AVX512F-NEXT: addl %edx, %esi
+; AVX512F-NEXT: popcntq %rax, %rdx
+; AVX512F-NEXT: popcntq %rcx, %rax
+; AVX512F-NEXT: addl %edx, %eax
+; AVX512F-NEXT: addl %esi, %eax
+; AVX512F-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: vector_ctpop_i256:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512VL-NEXT: vmovq %xmm0, %rcx
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rdx
+; AVX512VL-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX512VL-NEXT: popcntq %rsi, %rsi
+; AVX512VL-NEXT: popcntq %rdx, %rdx
+; AVX512VL-NEXT: addl %esi, %edx
+; AVX512VL-NEXT: xorl %esi, %esi
+; AVX512VL-NEXT: popcntq %rax, %rsi
+; AVX512VL-NEXT: xorl %eax, %eax
+; AVX512VL-NEXT: popcntq %rcx, %rax
+; AVX512VL-NEXT: addl %esi, %eax
+; AVX512VL-NEXT: addl %edx, %eax
+; AVX512VL-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512POPCNT-LABEL: vector_ctpop_i256:
+; AVX512POPCNT: # %bb.0:
+; AVX512POPCNT-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512POPCNT-NEXT: vmovq %xmm0, %rcx
+; AVX512POPCNT-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512POPCNT-NEXT: vmovq %xmm0, %rdx
+; AVX512POPCNT-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX512POPCNT-NEXT: popcntq %rsi, %rsi
+; AVX512POPCNT-NEXT: popcntq %rdx, %rdx
+; AVX512POPCNT-NEXT: addl %esi, %edx
+; AVX512POPCNT-NEXT: xorl %esi, %esi
+; AVX512POPCNT-NEXT: popcntq %rax, %rsi
+; AVX512POPCNT-NEXT: xorl %eax, %eax
+; AVX512POPCNT-NEXT: popcntq %rcx, %rax
+; AVX512POPCNT-NEXT: addl %esi, %eax
+; AVX512POPCNT-NEXT: addl %edx, %eax
+; AVX512POPCNT-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT: vzeroupper
+; AVX512POPCNT-NEXT: retq
+ %a0 = bitcast <8 x i32> %v0 to i256
+ %cnt = call i256 @llvm.ctpop.i256(i256 %a0)
+ %res = trunc i256 %cnt to i32
+ ret i32 %res
+}
+
define i32 @test_ctpop_i512(i512 %a0) nounwind {
; CHECK-LABEL: test_ctpop_i512:
; CHECK: # %bb.0:
@@ -404,6 +562,166 @@ define i32 @load_ctpop_i512(ptr %p0) nounwind {
ret i32 %res
}
+define i32 @vector_ctpop_i512(<16 x i32> %v0) nounwind {
+; SSE-LABEL: vector_ctpop_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: pextrq $1, %xmm0, %rcx
+; SSE-NEXT: movq %xmm1, %rdx
+; SSE-NEXT: pextrq $1, %xmm1, %rsi
+; SSE-NEXT: pextrq $1, %xmm2, %rdi
+; SSE-NEXT: movq %xmm2, %r8
+; SSE-NEXT: movq %xmm3, %r9
+; SSE-NEXT: pextrq $1, %xmm3, %r10
+; SSE-NEXT: popcntq %r10, %r10
+; SSE-NEXT: popcntq %r9, %r9
+; SSE-NEXT: addl %r10d, %r9d
+; SSE-NEXT: popcntq %rdi, %rdi
+; SSE-NEXT: popcntq %r8, %r8
+; SSE-NEXT: addl %edi, %r8d
+; SSE-NEXT: addl %r9d, %r8d
+; SSE-NEXT: popcntq %rsi, %rsi
+; SSE-NEXT: popcntq %rdx, %rdx
+; SSE-NEXT: addl %esi, %edx
+; SSE-NEXT: popcntq %rcx, %rcx
+; SSE-NEXT: popcntq %rax, %rax
+; SSE-NEXT: addl %ecx, %eax
+; SSE-NEXT: addl %edx, %eax
+; SSE-NEXT: addl %r8d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: vector_ctpop_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vmovq %xmm0, %rdx
+; AVX2-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX2-NEXT: vpextrq $1, %xmm1, %rdi
+; AVX2-NEXT: vmovq %xmm1, %r8
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0
+; AVX2-NEXT: vpextrq $1, %xmm0, %r9
+; AVX2-NEXT: vmovq %xmm0, %r10
+; AVX2-NEXT: popcntq %r9, %r9
+; AVX2-NEXT: popcntq %r10, %r10
+; AVX2-NEXT: addl %r9d, %r10d
+; AVX2-NEXT: popcntq %rdi, %rdi
+; AVX2-NEXT: popcntq %r8, %r8
+; AVX2-NEXT: addl %edi, %r8d
+; AVX2-NEXT: addl %r10d, %r8d
+; AVX2-NEXT: popcntq %rsi, %rsi
+; AVX2-NEXT: popcntq %rdx, %rdx
+; AVX2-NEXT: addl %esi, %edx
+; AVX2-NEXT: popcntq %rcx, %rcx
+; AVX2-NEXT: popcntq %rax, %rax
+; AVX2-NEXT: addl %ecx, %eax
+; AVX2-NEXT: addl %edx, %eax
+; AVX2-NEXT: addl %r8d, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: vector_ctpop_i512:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vmovq %xmm1, %rax
+; AVX512F-NEXT: vpextrq $1, %xmm1, %rcx
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rdx
+; AVX512F-NEXT: vmovq %xmm0, %rsi
+; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; AVX512F-NEXT: vpextrq $1, %xmm1, %rdi
+; AVX512F-NEXT: vmovq %xmm1, %r8
+; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; AVX512F-NEXT: vpextrq $1, %xmm0, %r9
+; AVX512F-NEXT: vmovq %xmm0, %r10
+; AVX512F-NEXT: popcntq %r9, %r9
+; AVX512F-NEXT: popcntq %r10, %r10
+; AVX512F-NEXT: addl %r9d, %r10d
+; AVX512F-NEXT: popcntq %rdi, %rdi
+; AVX512F-NEXT: popcntq %r8, %r8
+; AVX512F-NEXT: addl %edi, %r8d
+; AVX512F-NEXT: addl %r10d, %r8d
+; AVX512F-NEXT: popcntq %rdx, %rdx
+; AVX512F-NEXT: popcntq %rsi, %rsi
+; AVX512F-NEXT: addl %edx, %esi
+; AVX512F-NEXT: popcntq %rcx, %rcx
+; AVX512F-NEXT: popcntq %rax, %rax
+; AVX512F-NEXT: addl %ecx, %eax
+; AVX512F-NEXT: addl %esi, %eax
+; AVX512F-NEXT: addl %r8d, %eax
+; AVX512F-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: vector_ctpop_i512:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vmovq %xmm1, %rax
+; AVX512VL-NEXT: vpextrq $1, %xmm1, %rcx
+; AVX512VL-NEXT: vpextrq $1, %xmm0, %rdx
+; AVX512VL-NEXT: vmovq %xmm0, %rsi
+; AVX512VL-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; AVX512VL-NEXT: vmovq %xmm1, %rdi
+; AVX512VL-NEXT: vpextrq $1, %xmm1, %r8
+; AVX512VL-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %r9
+; AVX512VL-NEXT: vpextrq $1, %xmm0, %r10
+; AVX512VL-NEXT: popcntq %r10, %r10
+; AVX512VL-NEXT: popcntq %r9, %r9
+; AVX512VL-NEXT: addl %r10d, %r9d
+; AVX512VL-NEXT: popcntq %r8, %r8
+; AVX512VL-NEXT: popcntq %rdi, %rdi
+; AVX512VL-NEXT: addl %r8d, %edi
+; AVX512VL-NEXT: addl %r9d, %edi
+; AVX512VL-NEXT: popcntq %rdx, %rdx
+; AVX512VL-NEXT: popcntq %rsi, %rsi
+; AVX512VL-NEXT: addl %edx, %esi
+; AVX512VL-NEXT: popcntq %rcx, %rcx
+; AVX512VL-NEXT: popcntq %rax, %rax
+; AVX512VL-NEXT: addl %ecx, %eax
+; AVX512VL-NEXT: addl %esi, %eax
+; AVX512VL-NEXT: addl %edi, %eax
+; AVX512VL-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512POPCNT-LABEL: vector_ctpop_i512:
+; AVX512POPCNT: # %bb.0:
+; AVX512POPCNT-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512POPCNT-NEXT: vmovq %xmm1, %rax
+; AVX512POPCNT-NEXT: vpextrq $1, %xmm1, %rcx
+; AVX512POPCNT-NEXT: vpextrq $1, %xmm0, %rdx
+; AVX512POPCNT-NEXT: vmovq %xmm0, %rsi
+; AVX512POPCNT-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; AVX512POPCNT-NEXT: vmovq %xmm1, %rdi
+; AVX512POPCNT-NEXT: vpextrq $1, %xmm1, %r8
+; AVX512POPCNT-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; AVX512POPCNT-NEXT: vmovq %xmm0, %r9
+; AVX512POPCNT-NEXT: vpextrq $1, %xmm0, %r10
+; AVX512POPCNT-NEXT: popcntq %r10, %r10
+; AVX512POPCNT-NEXT: popcntq %r9, %r9
+; AVX512POPCNT-NEXT: addl %r10d, %r9d
+; AVX512POPCNT-NEXT: popcntq %r8, %r8
+; AVX512POPCNT-NEXT: popcntq %rdi, %rdi
+; AVX512POPCNT-NEXT: addl %r8d, %edi
+; AVX512POPCNT-NEXT: addl %r9d, %edi
+; AVX512POPCNT-NEXT: popcntq %rdx, %rdx
+; AVX512POPCNT-NEXT: popcntq %rsi, %rsi
+; AVX512POPCNT-NEXT: addl %edx, %esi
+; AVX512POPCNT-NEXT: popcntq %rcx, %rcx
+; AVX512POPCNT-NEXT: popcntq %rax, %rax
+; AVX512POPCNT-NEXT: addl %ecx, %eax
+; AVX512POPCNT-NEXT: addl %esi, %eax
+; AVX512POPCNT-NEXT: addl %edi, %eax
+; AVX512POPCNT-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT: vzeroupper
+; AVX512POPCNT-NEXT: retq
+ %a0 = bitcast <16 x i32> %v0 to i512
+ %cnt = call i512 @llvm.ctpop.i512(i512 %a0)
+ %res = trunc i512 %cnt to i32
+ ret i32 %res
+}
+
define i32 @test_ctpop_i1024(i1024 %a0) nounwind {
; SSE-LABEL: test_ctpop_i1024:
; SSE: # %bb.0:
@@ -969,6 +1287,75 @@ define i32 @load_ctlz_i128(ptr %p0) nounwind {
ret i32 %res
}
+define i32 @vector_ctlz_i128(<4 x i32> %v0) nounwind {
+; SSE-LABEL: vector_ctlz_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movq %xmm0, %rcx
+; SSE-NEXT: pextrq $1, %xmm0, %rdx
+; SSE-NEXT: bsrq %rdx, %rsi
+; SSE-NEXT: xorl $63, %esi
+; SSE-NEXT: movl $127, %eax
+; SSE-NEXT: bsrq %rcx, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rdx, %rdx
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: vector_ctlz_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX2-NEXT: lzcntq %rcx, %rdx
+; AVX2-NEXT: lzcntq %rax, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %rcx, %rcx
+; AVX2-NEXT: cmovnel %edx, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: vector_ctlz_i128:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX512F-NEXT: lzcntq %rcx, %rdx
+; AVX512F-NEXT: lzcntq %rax, %rax
+; AVX512F-NEXT: addl $64, %eax
+; AVX512F-NEXT: testq %rcx, %rcx
+; AVX512F-NEXT: cmovnel %edx, %eax
+; AVX512F-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: vector_ctlz_i128:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: lzcntq %rcx, %rdx
+; AVX512VL-NEXT: lzcntq %rax, %rax
+; AVX512VL-NEXT: addl $64, %eax
+; AVX512VL-NEXT: testq %rcx, %rcx
+; AVX512VL-NEXT: cmovnel %edx, %eax
+; AVX512VL-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT: retq
+;
+; AVX512POPCNT-LABEL: vector_ctlz_i128:
+; AVX512POPCNT: # %bb.0:
+; AVX512POPCNT-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX512POPCNT-NEXT: vmovq %xmm0, %rax
+; AVX512POPCNT-NEXT: lzcntq %rcx, %rdx
+; AVX512POPCNT-NEXT: lzcntq %rax, %rax
+; AVX512POPCNT-NEXT: addl $64, %eax
+; AVX512POPCNT-NEXT: testq %rcx, %rcx
+; AVX512POPCNT-NEXT: cmovnel %edx, %eax
+; AVX512POPCNT-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT: retq
+ %a0 = bitcast <4 x i32> %v0 to i128
+ %cnt = call i128 @llvm.ctlz.i128(i128 %a0, i1 0)
+ %res = trunc i128 %cnt to i32
+ ret i32 %res
+}
+
define i32 @test_ctlz_i256(i256 %a0) nounwind {
; SSE-LABEL: test_ctlz_i256:
; SSE: # %bb.0:
@@ -1125,6 +1512,135 @@ define i32 @load_ctlz_i256(ptr %p0) nounwind {
ret i32 %res
}
+define i32 @vector_ctlz_i256(<8 x i32> %v0) nounwind {
+; SSE-LABEL: vector_ctlz_i256:
+; SSE: # %bb.0:
+; SSE-NEXT: movq %xmm0, %rcx
+; SSE-NEXT: pextrq $1, %xmm0, %rdx
+; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: pextrq $1, %xmm1, %rsi
+; SSE-NEXT: bsrq %rsi, %rdi
+; SSE-NEXT: xorl $63, %edi
+; SSE-NEXT: bsrq %rax, %r8
+; SSE-NEXT: xorl $63, %r8d
+; SSE-NEXT: orl $64, %r8d
+; SSE-NEXT: testq %rsi, %rsi
+; SSE-NEXT: cmovnel %edi, %r8d
+; SSE-NEXT: bsrq %rdx, %rsi
+; SSE-NEXT: xorl $63, %esi
+; SSE-NEXT: movl $127, %eax
+; SSE-NEXT: bsrq %rcx, %rax
+; SSE-NEXT: xorl $63, %eax
+; SSE-NEXT: addl $64, %eax
+; SSE-NEXT: testq %rdx, %rdx
+; SSE-NEXT: cmovnel %esi, %eax
+; SSE-NEXT: subl $-128, %eax
+; SSE-NEXT: ptest %xmm1, %xmm1
+; SSE-NEXT: cmovnel %r8d, %eax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: vector_ctlz_i256:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vmovq %xmm0, %rdx
+; AVX2-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX2-NEXT: lzcntq %rsi, %rdi
+; AVX2-NEXT: lzcntq %rdx, %r8
+; AVX2-NEXT: addl $64, %r8d
+; AVX2-NEXT: testq %rsi, %rsi
+; AVX2-NEXT: cmovnel %edi, %r8d
+; AVX2-NEXT: xorl %edi, %edi
+; AVX2-NEXT: lzcntq %rcx, %rdi
+; AVX2-NEXT: lzcntq %rax, %rax
+; AVX2-NEXT: addl $64, %eax
+; AVX2-NEXT: testq %rcx, %rcx
+; AVX2-NEXT: cmovnel %edi, %eax
+; AVX2-NEXT: subl $-128, %eax
+; AVX2-NEXT: orq %rsi, %rdx
+; AVX2-NEXT: cmovnel %r8d, %eax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: vector_ctlz_i256:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vmovq %xmm0, %rdx
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX512F-NEXT: lzcntq %rsi, %rdi
+; AVX512F-NEXT: lzcntq %rdx, %r8
+; AVX512F-NEXT: addl $64, %r8d
+; AVX512F-NEXT: testq %rsi, %rsi
+; AVX512F-NEXT: cmovnel %edi, %r8d
+; AVX512F-NEXT: lzcntq %rcx, %rdi
+; AVX512F-NEXT: lzcntq %rax, %rax
+; AVX512F-NEXT: addl $64, %eax
+; AVX512F-NEXT: testq %rcx, %rcx
+; AVX512F-NEXT: cmovnel %edi, %eax
+; AVX512F-NEXT: subl $-128, %eax
+; AVX512F-NEXT: orq %rsi, %rdx
+; AVX512F-NEXT: cmovnel %r8d, %eax
+; AVX512F-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: vector_ctlz_i256:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rdx
+; AVX512VL-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX512VL-NEXT: lzcntq %rsi, %rdi
+; AVX512VL-NEXT: lzcntq %rdx, %r8
+; AVX512VL-NEXT: addl $64, %r8d
+; AVX512VL-NEXT: testq %rsi, %rsi
+; AVX512VL-NEXT: cmovnel %edi, %r8d
+; AVX512VL-NEXT: lzcntq %rcx, %rdi
+; AVX512VL-NEXT: lzcntq %rax, %rax
+; AVX512VL-NEXT: addl $64, %eax
+; AVX512VL-NEXT: testq %rcx, %rcx
+; AVX512VL-NEXT: cmovnel %edi, %eax
+; AVX512VL-NEXT: subl $-128, %eax
+; AVX512VL-NEXT: orq %rsi, %rdx
+; AVX512VL-NEXT: cmovnel %r8d, %eax
+; AVX512VL-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512POPCNT-LABEL: vector_ctlz_i256:
+; AVX512POPCNT: # %bb.0:
+; AVX512POPCNT-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX512POPCNT-NEXT: vmovq %xmm0, %rax
+; AVX512POPCNT-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512POPCNT-NEXT: vmovq %xmm0, %rdx
+; AVX512POPCNT-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX512POPCNT-NEXT: lzcntq %rsi, %rdi
+; AVX512POPCNT-NEXT: lzcntq %rdx, %r8
+; AVX512POPCNT-NEXT: addl $64, %r8d
+; AVX512POPCNT-NEXT: testq %rsi, %rsi
+; AVX512POPCNT-NEXT: cmovnel %edi, %r8d
+; AVX512POPCNT-NEXT: lzcntq %rcx, %rdi
+; AVX512POPCNT-NEXT: lzcntq %rax, %rax
+; AVX512POPCNT-NEXT: addl $64, %eax
+; AVX512POPCNT-NEXT: testq %rcx, %rcx
+; AVX512POPCNT-NEXT: cmovnel %edi, %eax
+; AVX512POPCNT-NEXT: subl $-128, %eax
+; AVX512POPCNT-NEXT: orq %rsi, %rdx
+; AVX512POPCNT-NEXT: cmovnel %r8d, %eax
+; AVX512POPCNT-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT: vzeroupper
+; AVX512POPCNT-NEXT: retq
+ %a0 = bitcast <8 x i32> %v0 to i256
+ %cnt = call i256 @llvm.ctlz.i256(i256 %a0, i1 0)
+ %res = trunc i256 %cnt to i32
+ ret i32 %res
+}
+
define i32 @test_ctlz_i512(i512 %a0) nounwind {
; SSE-LABEL: test_ctlz_i512:
; SSE: # %bb.0:
@@ -1423,10 +1939,155 @@ define i32 @load_ctlz_i512(ptr %p0) nounwind {
; AVX2-NEXT: popq %r15
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: load_ctlz_i512:
+; AVX512F-LABEL: load_ctlz_i512:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,6,5,4,3,2,1,0]
+; AVX512F-NEXT: vpermq (%rdi), %zmm0, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vplzcntq %zmm0, %zmm0
+; AVX512F-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm1 = [512,512,512,512,512,512,5...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171481
More information about the llvm-commits
mailing list