[llvm] [X86] Expand i512 shifts on AVX512 targets (PR #183198)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 6 01:12:35 PST 2026
================
@@ -3474,78 +2691,42 @@ define i64 @ashr_extract_idx_load_i512_i64(ptr %p0, i512 %a1) nounwind {
;
; AVX512F-LABEL: ashr_extract_idx_load_i512_i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rax
-; AVX512F-NEXT: vmovups (%rdi), %ymm0
-; AVX512F-NEXT: vmovaps 32(%rdi), %xmm1
-; AVX512F-NEXT: movq 48(%rdi), %rax
-; AVX512F-NEXT: movq 56(%rdi), %rcx
-; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: sarq $63, %rcx
-; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT: andl $7, %esi
-; AVX512F-NEXT: movq -128(%rsp,%rsi,8), %rax
-; AVX512F-NEXT: popq %rcx
+; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512F-NEXT: vpsraq $63, %zmm0, %zmm1
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7]
+; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1
+; AVX512F-NEXT: movl $-1, %eax
+; AVX512F-NEXT: shlxl %esi, %eax, %eax
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: vpcompressq %zmm0, %zmm1 {%k1}
+; AVX512F-NEXT: vmovq %xmm1, %rax
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: ashr_extract_idx_load_i512_i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: pushq %rax
-; AVX512VL-NEXT: vmovups (%rdi), %ymm0
-; AVX512VL-NEXT: vmovaps 32(%rdi), %xmm1
-; AVX512VL-NEXT: movq 48(%rdi), %rax
-; AVX512VL-NEXT: movq 56(%rdi), %rcx
-; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: sarq $63, %rcx
-; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: andl $7, %esi
-; AVX512VL-NEXT: movq -128(%rsp,%rsi,8), %rax
-; AVX512VL-NEXT: popq %rcx
+; AVX512VL-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512VL-NEXT: vpsraq $63, %zmm0, %zmm1
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7]
+; AVX512VL-NEXT: vpermq %zmm1, %zmm2, %zmm1
+; AVX512VL-NEXT: movl $-1, %eax
+; AVX512VL-NEXT: shlxl %esi, %eax, %eax
+; AVX512VL-NEXT: kmovd %eax, %k1
+; AVX512VL-NEXT: vpcompressq %zmm0, %zmm1 {%k1}
+; AVX512VL-NEXT: vmovq %xmm1, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: ashr_extract_idx_load_i512_i64:
; AVX512VBMI: # %bb.0:
-; AVX512VBMI-NEXT: pushq %rax
-; AVX512VBMI-NEXT: vmovups (%rdi), %ymm0
-; AVX512VBMI-NEXT: vmovaps 32(%rdi), %xmm1
-; AVX512VBMI-NEXT: movq 48(%rdi), %rax
-; AVX512VBMI-NEXT: movq 56(%rdi), %rcx
-; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: sarq $63, %rcx
-; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: andl $7, %esi
-; AVX512VBMI-NEXT: movq -128(%rsp,%rsi,8), %rax
-; AVX512VBMI-NEXT: popq %rcx
+; AVX512VBMI-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512VBMI-NEXT: vpsraq $63, %zmm0, %zmm1
+; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7]
+; AVX512VBMI-NEXT: vpermq %zmm1, %zmm2, %zmm1
----------------
RKSimon wrote:
I agree, unfortunately compress/expand are difficult to work with in SimplifyDemandedBits/SimplifyDemandedVectorElts - even if the mask had been constant - I'll see what I can do in follow ups.
https://github.com/llvm/llvm-project/pull/183198
More information about the llvm-commits
mailing list