[llvm] [X86] shift-i512.ll - extend test coverage (PR #171125)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 05:46:52 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Remove v8i64 dependency from original shift-by-1 tests - this was added for #<!-- -->132601 but is unlikely to be necessary
Add tests for general shifts as well as shift-by-constant and shift-of-constant examples
---
Patch is 86.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171125.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/X86/shift-i512.ll (+2024-182)
``````````diff
diff --git a/llvm/test/CodeGen/X86/shift-i512.ll b/llvm/test/CodeGen/X86/shift-i512.ll
index 03b61d9235254..4d341f1b31027 100644
--- a/llvm/test/CodeGen/X86/shift-i512.ll
+++ b/llvm/test/CodeGen/X86/shift-i512.ll
@@ -1,208 +1,2050 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s -check-prefixes=AVX512VL
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw,+avx512vbmi2 | FileCheck %s -check-prefixes=AVX512VBMI
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s -check-prefixes=ZNVER4
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s -check-prefixes=CHECK,SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s -check-prefixes=CHECK,SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s -check-prefixes=CHECK,AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=knl | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512VL
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 -mattr=+avx512vbmi2 | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512VBMI
-; i512 shifts hidden inside 512-bit vectors.
+define i512 @shl_i512(i512 %a0, i512 %a1) nounwind {
+; SSE-LABEL: shl_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: andl $63, %ecx
+; SSE-NEXT: shrl $3, %eax
+; SSE-NEXT: andl $56, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: cltq
+; SSE-NEXT: movq -56(%rsp,%rax), %rdx
+; SSE-NEXT: movq -48(%rsp,%rax), %r9
+; SSE-NEXT: movq %r9, %rsi
+; SSE-NEXT: shldq %cl, %rdx, %rsi
+; SSE-NEXT: movq -40(%rsp,%rax), %r10
+; SSE-NEXT: movq %r10, %r8
+; SSE-NEXT: shldq %cl, %r9, %r8
+; SSE-NEXT: movq -32(%rsp,%rax), %r9
+; SSE-NEXT: movq %r9, %r11
+; SSE-NEXT: shldq %cl, %r10, %r11
+; SSE-NEXT: movq -24(%rsp,%rax), %r10
+; SSE-NEXT: movq %r10, %rbx
+; SSE-NEXT: shldq %cl, %r9, %rbx
+; SSE-NEXT: movq -16(%rsp,%rax), %r9
+; SSE-NEXT: movq %r9, %r14
+; SSE-NEXT: shldq %cl, %r10, %r14
+; SSE-NEXT: movq -8(%rsp,%rax), %r10
+; SSE-NEXT: shldq %cl, %r9, %r10
+; SSE-NEXT: movq -64(%rsp,%rax), %rax
+; SSE-NEXT: movq %rax, %r9
+; SSE-NEXT: shlq %cl, %r9
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shldq %cl, %rax, %rdx
+; SSE-NEXT: movq %rdi, %rax
+; SSE-NEXT: movq %r10, 56(%rdi)
+; SSE-NEXT: movq %r14, 48(%rdi)
+; SSE-NEXT: movq %rbx, 40(%rdi)
+; SSE-NEXT: movq %r11, 32(%rdi)
+; SSE-NEXT: movq %r8, 24(%rdi)
+; SSE-NEXT: movq %rsi, 16(%rdi)
+; SSE-NEXT: movq %rdx, 8(%rdi)
+; SSE-NEXT: movq %r9, (%rdi)
+; SSE-NEXT: addq $8, %rsp
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r14
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: shl_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: pushq %rax
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movl %eax, %ecx
+; AVX2-NEXT: andl $63, %ecx
+; AVX2-NEXT: shrl $3, %eax
+; AVX2-NEXT: andl $56, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: movslq %eax, %r8
+; AVX2-NEXT: movq -56(%rsp,%r8), %rdx
+; AVX2-NEXT: movq -48(%rsp,%r8), %rax
+; AVX2-NEXT: movq %rax, %rsi
+; AVX2-NEXT: shldq %cl, %rdx, %rsi
+; AVX2-NEXT: movq -40(%rsp,%r8), %r10
+; AVX2-NEXT: movq %r10, %r9
+; AVX2-NEXT: shldq %cl, %rax, %r9
+; AVX2-NEXT: movq -32(%rsp,%r8), %rax
+; AVX2-NEXT: movq %rax, %r11
+; AVX2-NEXT: shldq %cl, %r10, %r11
+; AVX2-NEXT: movq -24(%rsp,%r8), %r10
+; AVX2-NEXT: movq %r10, %rbx
+; AVX2-NEXT: shldq %cl, %rax, %rbx
+; AVX2-NEXT: movq -16(%rsp,%r8), %rax
+; AVX2-NEXT: movq %rax, %r14
+; AVX2-NEXT: shldq %cl, %r10, %r14
+; AVX2-NEXT: movq -8(%rsp,%r8), %r10
+; AVX2-NEXT: shldq %cl, %rax, %r10
+; AVX2-NEXT: movq %rdi, %rax
+; AVX2-NEXT: movq -64(%rsp,%r8), %rdi
+; AVX2-NEXT: shlxq %rcx, %rdi, %r8
+; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
+; AVX2-NEXT: shldq %cl, %rdi, %rdx
+; AVX2-NEXT: movq %r10, 56(%rax)
+; AVX2-NEXT: movq %r14, 48(%rax)
+; AVX2-NEXT: movq %rbx, 40(%rax)
+; AVX2-NEXT: movq %r11, 32(%rax)
+; AVX2-NEXT: movq %r9, 24(%rax)
+; AVX2-NEXT: movq %rsi, 16(%rax)
+; AVX2-NEXT: movq %rdx, 8(%rax)
+; AVX2-NEXT: movq %r8, (%rax)
+; AVX2-NEXT: addq $8, %rsp
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: shl_i512:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: pushq %r14
+; AVX512F-NEXT: pushq %rbx
+; AVX512F-NEXT: pushq %rax
+; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
+; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX512F-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX512F-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movl %eax, %ecx
+; AVX512F-NEXT: andl $63, %ecx
+; AVX512F-NEXT: shrl $3, %eax
+; AVX512F-NEXT: andl $56, %eax
+; AVX512F-NEXT: negl %eax
+; AVX512F-NEXT: movslq %eax, %r8
+; AVX512F-NEXT: movq -56(%rsp,%r8), %rdx
+; AVX512F-NEXT: movq -48(%rsp,%r8), %rax
+; AVX512F-NEXT: movq %rax, %rsi
+; AVX512F-NEXT: shldq %cl, %rdx, %rsi
+; AVX512F-NEXT: movq -40(%rsp,%r8), %r10
+; AVX512F-NEXT: movq %r10, %r9
+; AVX512F-NEXT: shldq %cl, %rax, %r9
+; AVX512F-NEXT: movq -32(%rsp,%r8), %rax
+; AVX512F-NEXT: movq %rax, %r11
+; AVX512F-NEXT: shldq %cl, %r10, %r11
+; AVX512F-NEXT: movq -24(%rsp,%r8), %r10
+; AVX512F-NEXT: movq %r10, %rbx
+; AVX512F-NEXT: shldq %cl, %rax, %rbx
+; AVX512F-NEXT: movq -16(%rsp,%r8), %rax
+; AVX512F-NEXT: movq %rax, %r14
+; AVX512F-NEXT: shldq %cl, %r10, %r14
+; AVX512F-NEXT: movq -8(%rsp,%r8), %r10
+; AVX512F-NEXT: shldq %cl, %rax, %r10
+; AVX512F-NEXT: movq %rdi, %rax
+; AVX512F-NEXT: movq -64(%rsp,%r8), %rdi
+; AVX512F-NEXT: shlxq %rcx, %rdi, %r8
+; AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx
+; AVX512F-NEXT: shldq %cl, %rdi, %rdx
+; AVX512F-NEXT: movq %r10, 56(%rax)
+; AVX512F-NEXT: movq %r14, 48(%rax)
+; AVX512F-NEXT: movq %rbx, 40(%rax)
+; AVX512F-NEXT: movq %r11, 32(%rax)
+; AVX512F-NEXT: movq %r9, 24(%rax)
+; AVX512F-NEXT: movq %rsi, 16(%rax)
+; AVX512F-NEXT: movq %rdx, 8(%rax)
+; AVX512F-NEXT: movq %r8, (%rax)
+; AVX512F-NEXT: addq $8, %rsp
+; AVX512F-NEXT: popq %rbx
+; AVX512F-NEXT: popq %r14
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: shl_i512:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: pushq %r15
+; AVX512VL-NEXT: pushq %r14
+; AVX512VL-NEXT: pushq %rbx
+; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
+; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: movl %eax, %ecx
+; AVX512VL-NEXT: andl $63, %ecx
+; AVX512VL-NEXT: shrl $3, %eax
+; AVX512VL-NEXT: andl $56, %eax
+; AVX512VL-NEXT: negl %eax
+; AVX512VL-NEXT: movslq %eax, %r9
+; AVX512VL-NEXT: movq -56(%rsp,%r9), %rdx
+; AVX512VL-NEXT: movq -48(%rsp,%r9), %rax
+; AVX512VL-NEXT: movq %rax, %rsi
+; AVX512VL-NEXT: shldq %cl, %rdx, %rsi
+; AVX512VL-NEXT: movq -40(%rsp,%r9), %r10
+; AVX512VL-NEXT: movq %r10, %r8
+; AVX512VL-NEXT: shldq %cl, %rax, %r8
+; AVX512VL-NEXT: movq -32(%rsp,%r9), %r11
+; AVX512VL-NEXT: movq %r11, %rbx
+; AVX512VL-NEXT: shldq %cl, %r10, %rbx
+; AVX512VL-NEXT: movq %rdi, %rax
+; AVX512VL-NEXT: movq -24(%rsp,%r9), %rdi
+; AVX512VL-NEXT: movq %rdi, %r10
+; AVX512VL-NEXT: shldq %cl, %r11, %r10
+; AVX512VL-NEXT: movq -64(%rsp,%r9), %r11
+; AVX512VL-NEXT: movq -16(%rsp,%r9), %r14
+; AVX512VL-NEXT: movq %r14, %r15
+; AVX512VL-NEXT: shldq %cl, %rdi, %r15
+; AVX512VL-NEXT: movq -8(%rsp,%r9), %rdi
+; AVX512VL-NEXT: shldq %cl, %r14, %rdi
+; AVX512VL-NEXT: shlxq %rcx, %r11, %r9
+; AVX512VL-NEXT: # kill: def $cl killed $cl killed $rcx
+; AVX512VL-NEXT: shldq %cl, %r11, %rdx
+; AVX512VL-NEXT: movq %rdi, 56(%rax)
+; AVX512VL-NEXT: movq %r15, 48(%rax)
+; AVX512VL-NEXT: movq %r10, 40(%rax)
+; AVX512VL-NEXT: movq %rbx, 32(%rax)
+; AVX512VL-NEXT: movq %r8, 24(%rax)
+; AVX512VL-NEXT: movq %rsi, 16(%rax)
+; AVX512VL-NEXT: movq %rdx, 8(%rax)
+; AVX512VL-NEXT: movq %r9, (%rax)
+; AVX512VL-NEXT: popq %rbx
+; AVX512VL-NEXT: popq %r14
+; AVX512VL-NEXT: popq %r15
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512VBMI-LABEL: shl_i512:
+; AVX512VBMI: # %bb.0:
+; AVX512VBMI-NEXT: pushq %r15
+; AVX512VBMI-NEXT: pushq %r14
+; AVX512VBMI-NEXT: pushq %rbx
+; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX512VBMI-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
+; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movl %eax, %ecx
+; AVX512VBMI-NEXT: andl $63, %ecx
+; AVX512VBMI-NEXT: shrl $3, %eax
+; AVX512VBMI-NEXT: andl $56, %eax
+; AVX512VBMI-NEXT: negl %eax
+; AVX512VBMI-NEXT: movslq %eax, %r9
+; AVX512VBMI-NEXT: movq -56(%rsp,%r9), %rdx
+; AVX512VBMI-NEXT: movq -48(%rsp,%r9), %rax
+; AVX512VBMI-NEXT: movq %rax, %rsi
+; AVX512VBMI-NEXT: shldq %cl, %rdx, %rsi
+; AVX512VBMI-NEXT: movq -40(%rsp,%r9), %r10
+; AVX512VBMI-NEXT: movq %r10, %r8
+; AVX512VBMI-NEXT: shldq %cl, %rax, %r8
+; AVX512VBMI-NEXT: movq -32(%rsp,%r9), %r11
+; AVX512VBMI-NEXT: movq %r11, %rbx
+; AVX512VBMI-NEXT: shldq %cl, %r10, %rbx
+; AVX512VBMI-NEXT: movq %rdi, %rax
+; AVX512VBMI-NEXT: movq -24(%rsp,%r9), %rdi
+; AVX512VBMI-NEXT: movq %rdi, %r10
+; AVX512VBMI-NEXT: shldq %cl, %r11, %r10
+; AVX512VBMI-NEXT: movq -64(%rsp,%r9), %r11
+; AVX512VBMI-NEXT: movq -16(%rsp,%r9), %r14
+; AVX512VBMI-NEXT: movq %r14, %r15
+; AVX512VBMI-NEXT: shldq %cl, %rdi, %r15
+; AVX512VBMI-NEXT: movq -8(%rsp,%r9), %rdi
+; AVX512VBMI-NEXT: shldq %cl, %r14, %rdi
+; AVX512VBMI-NEXT: shlxq %rcx, %r11, %r9
+; AVX512VBMI-NEXT: # kill: def $cl killed $cl killed $rcx
+; AVX512VBMI-NEXT: shldq %cl, %r11, %rdx
+; AVX512VBMI-NEXT: movq %rdi, 56(%rax)
+; AVX512VBMI-NEXT: movq %r15, 48(%rax)
+; AVX512VBMI-NEXT: movq %r10, 40(%rax)
+; AVX512VBMI-NEXT: movq %rbx, 32(%rax)
+; AVX512VBMI-NEXT: movq %r8, 24(%rax)
+; AVX512VBMI-NEXT: movq %rsi, 16(%rax)
+; AVX512VBMI-NEXT: movq %rdx, 8(%rax)
+; AVX512VBMI-NEXT: movq %r9, (%rax)
+; AVX512VBMI-NEXT: popq %rbx
+; AVX512VBMI-NEXT: popq %r14
+; AVX512VBMI-NEXT: popq %r15
+; AVX512VBMI-NEXT: vzeroupper
+; AVX512VBMI-NEXT: retq
+ %r = shl i512 %a0, %a1
+ ret i512 %r
+}
-define <8 x i64> @shl_i512_1(<8 x i64> %a) {
-; AVX512VL-LABEL: shl_i512_1:
+define i512 @lshr_i512(i512 %a0, i512 %a1) nounwind {
+; SSE-LABEL: lshr_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: andl $63, %ecx
+; SSE-NEXT: shrl $3, %eax
+; SSE-NEXT: andl $56, %eax
+; SSE-NEXT: movq -112(%rsp,%rax), %rdx
+; SSE-NEXT: movq -120(%rsp,%rax), %r9
+; SSE-NEXT: movq %r9, %rsi
+; SSE-NEXT: shrdq %cl, %rdx, %rsi
+; SSE-NEXT: movq -104(%rsp,%rax), %r8
+; SSE-NEXT: shrdq %cl, %r8, %rdx
+; SSE-NEXT: movq -96(%rsp,%rax), %r10
+; SSE-NEXT: shrdq %cl, %r10, %r8
+; SSE-NEXT: movq -88(%rsp,%rax), %r11
+; SSE-NEXT: shrdq %cl, %r11, %r10
+; SSE-NEXT: movq -80(%rsp,%rax), %rbx
+; SSE-NEXT: shrdq %cl, %rbx, %r11
+; SSE-NEXT: movq -72(%rsp,%rax), %r14
+; SSE-NEXT: shrdq %cl, %r14, %rbx
+; SSE-NEXT: movq -128(%rsp,%rax), %r15
+; SSE-NEXT: shrdq %cl, %r9, %r15
+; SSE-NEXT: movq %rdi, %rax
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shrq %cl, %r14
+; SSE-NEXT: movq %r14, 56(%rdi)
+; SSE-NEXT: movq %rbx, 48(%rdi)
+; SSE-NEXT: movq %r11, 40(%rdi)
+; SSE-NEXT: movq %r10, 32(%rdi)
+; SSE-NEXT: movq %r8, 24(%rdi)
+; SSE-NEXT: movq %rdx, 16(%rdi)
+; SSE-NEXT: movq %rsi, 8(%rdi)
+; SSE-NEXT: movq %r15, (%rdi)
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: lshr_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movl %eax, %ecx
+; AVX2-NEXT: andl $63, %ecx
+; AVX2-NEXT: shrl $3, %eax
+; AVX2-NEXT: andl $56, %eax
+; AVX2-NEXT: movq -112(%rsp,%rax), %rdx
+; AVX2-NEXT: movq -120(%rsp,%rax), %r9
+; AVX2-NEXT: movq %r9, %rsi
+; AVX2-NEXT: shrdq %cl, %rdx, %rsi
+; AVX2-NEXT: movq -104(%rsp,%rax), %r8
+; AVX2-NEXT: shrdq %cl, %r8, %rdx
+; AVX2-NEXT: movq -96(%rsp,%rax), %r10
+; AVX2-NEXT: shrdq %cl, %r10, %r8
+; AVX2-NEXT: movq -88(%rsp,%rax), %r11
+; AVX2-NEXT: shrdq %cl, %r11, %r10
+; AVX2-NEXT: movq -80(%rsp,%rax), %rbx
+; AVX2-NEXT: shrdq %cl, %rbx, %r11
+; AVX2-NEXT: movq -128(%rsp,%rax), %r14
+; AVX2-NEXT: movq -72(%rsp,%rax), %r15
+; AVX2-NEXT: shrdq %cl, %r15, %rbx
+; AVX2-NEXT: shrdq %cl, %r9, %r14
+; AVX2-NEXT: movq %rdi, %rax
+; AVX2-NEXT: shrxq %rcx, %r15, %rcx
+; AVX2-NEXT: movq %rcx, 56(%rdi)
+; AVX2-NEXT: movq %rbx, 48(%rdi)
+; AVX2-NEXT: movq %r11, 40(%rdi)
+; AVX2-NEXT: movq %r10, 32(%rdi)
+; AVX2-NEXT: movq %r8, 24(%rdi)
+; AVX2-NEXT: movq %rdx, 16(%rdi)
+; AVX2-NEXT: movq %rsi, 8(%rdi)
+; AVX2-NEXT: movq %r14, (%rdi)
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: lshr_i512:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: pushq %r15
+; AVX512F-NEXT: pushq %r14
+; AVX512F-NEXT: pushq %rbx
+; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
+; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512F-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movl %eax, %ecx
+; AVX512F-NEXT: andl $63, %ecx
+; AVX512F-NEXT: shrl $3, %eax
+; AVX512F-NEXT: andl $56, %eax
+; AVX512F-NEXT: movq -112(%rsp,%rax), %rdx
+; AVX512F-NEXT: movq -120(%rsp,%rax), %r9
+; AVX512F-NEXT: movq %r9, %rsi
+; AVX512F-NEXT: shrdq %cl, %rdx, %rsi
+; AVX512F-NEXT: movq -104(%rsp,%rax), %r8
+; AVX512F-NEXT: shrdq %cl, %r8, %rdx
+; AVX512F-NEXT: movq -96(%rsp,%rax), %r10
+; AVX512F-NEXT: shrdq %cl, %r10, %r8
+; AVX512F-NEXT: movq -88(%rsp,%rax), %r11
+; AVX512F-NEXT: shrdq %cl, %r11, %r10
+; AVX512F-NEXT: movq -80(%rsp,%rax), %rbx
+; AVX512F-NEXT: shrdq %cl, %rbx, %r11
+; AVX512F-NEXT: movq -128(%rsp,%rax), %r14
+; AVX512F-NEXT: movq -72(%rsp,%rax), %r15
+; AVX512F-NEXT: shrdq %cl, %r15, %rbx
+; AVX512F-NEXT: shrdq %cl, %r9, %r14
+; AVX512F-NEXT: movq %rdi, %rax
+; AVX512F-NEXT: shrxq %rcx, %r15, %rcx
+; AVX512F-NEXT: movq %rcx, 56(%rdi)
+; AVX512F-NEXT: movq %rbx, 48(%rdi)
+; AVX512F-NEXT: movq %r11, 40(%rdi)
+; AVX512F-NEXT: movq %r10, 32(%rdi)
+; AVX512F-NEXT: movq %r8, 24(%rdi)
+; AVX512F-NEXT: movq %rdx, 16(%rdi)
+; AVX512F-NEXT: movq %rsi, 8(%rdi)
+; AVX512F-NEXT: movq %r14, (%rdi)
+; AVX512F-NEXT: popq %rbx
+; AVX512F-NEXT: popq %r14
+; AVX512F-NEXT: popq %r15
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: lshr_i512:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: valignq {{.*#+}} zmm1 = zmm0[3,4,5,6,7,0,1,2]
-; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512VL-NEXT: vpaddq %xmm0, %xmm0, %xmm3
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
-; AVX512VL-NEXT: vpsrlq $63, %xmm4, %xmm4
-; AVX512VL-NEXT: vpaddq %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vpor %xmm4, %xmm2, %xmm2
-; AVX512VL-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512VL-NEXT: vpaddq %ymm3, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsrlq $63, %ymm1, %ymm1
-; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
-; AVX512VL-NEXT: vpsrlq $63, %zmm0, %zmm2
-; AVX512VL-NEXT: vpshufd {...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171125
More information about the llvm-commits
mailing list