[llvm] [X86] Add test coverage for #164853 (PR #165245)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 27 05:44:15 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Show examples of where truncated loads of non-constant, but known aligned, shift amounts can be folded into address math to avoid loads/spills of large/illegal scalar integers
---
Patch is 73.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165245.diff
1 Files Affected:
- (added) llvm/test/CodeGen/X86/trunc-srl-load.ll (+1672)
``````````diff
diff --git a/llvm/test/CodeGen/X86/trunc-srl-load.ll b/llvm/test/CodeGen/X86/trunc-srl-load.ll
new file mode 100644
index 0000000000000..4dae1433b2196
--- /dev/null
+++ b/llvm/test/CodeGen/X86/trunc-srl-load.ll
@@ -0,0 +1,1672 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
+
+; Tests showing for the analysis of non-constant shift amounts to improve load address math
+
+; Alignment of shift amounts should allow sub-integer loads.
+
+define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub64_16:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl (%eax), %edx
+; X86-NEXT: movl 4(%eax), %esi
+; X86-NEXT: movb %ch, %cl
+; X86-NEXT: andb $16, %cl
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: shrl %cl, %eax
+; X86-NEXT: shrdl %cl, %esi, %edx
+; X86-NEXT: testb $32, %ch
+; X86-NEXT: jne .LBB0_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: .LBB0_2:
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; SSE-LABEL: extractSub64_16:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: andb $48, %cl
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shrq %cl, %rax
+; SSE-NEXT: # kill: def $ax killed $ax killed $rax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: extractSub64_16:
+; AVX: # %bb.0:
+; AVX-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX-NEXT: andb $48, %sil
+; AVX-NEXT: shrxq %rsi, (%rdi), %rax
+; AVX-NEXT: # kill: def $ax killed $ax killed $rax
+; AVX-NEXT: retq
+ %idx_bounds = and i32 %idx, 63
+ %idx_align = and i32 %idx_bounds, -16
+ %sh = zext nneg i32 %idx_align to i64
+ %ld = load i64, ptr %word, align 8
+ %sub = lshr i64 %ld, %sh
+ %res = trunc i64 %sub to i16
+ ret i16 %res
+}
+
+define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_16:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: movzbl 12(%ebp), %eax
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl 4(%ecx), %esi
+; X86-NEXT: movl 8(%ecx), %edi
+; X86-NEXT: movl 12(%ecx), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, (%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: andb $16, %cl
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: movzbl %al, %edx
+; X86-NEXT: movl (%esp,%edx), %eax
+; X86-NEXT: movl 4(%esp,%edx), %edx
+; X86-NEXT: shrdl %cl, %edx, %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: leal -8(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: extractSub128_16:
+; SSE: # %bb.0:
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: movq 8(%rdi), %rdx
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: andb $48, %cl
+; SSE-NEXT: movq %rdx, %rdi
+; SSE-NEXT: shrq %cl, %rdi
+; SSE-NEXT: shrdq %cl, %rdx, %rax
+; SSE-NEXT: testb $64, %sil
+; SSE-NEXT: cmovneq %rdi, %rax
+; SSE-NEXT: # kill: def $ax killed $ax killed $rax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: extractSub128_16:
+; AVX: # %bb.0:
+; AVX-NEXT: movq (%rdi), %rdx
+; AVX-NEXT: movq 8(%rdi), %rax
+; AVX-NEXT: movl %esi, %ecx
+; AVX-NEXT: andb $48, %cl
+; AVX-NEXT: shrdq %cl, %rax, %rdx
+; AVX-NEXT: shrxq %rcx, %rax, %rax
+; AVX-NEXT: testb $64, %sil
+; AVX-NEXT: cmoveq %rdx, %rax
+; AVX-NEXT: # kill: def $ax killed $ax killed $rax
+; AVX-NEXT: retq
+ %idx_bounds = and i32 %idx, 127
+ %idx_align = and i32 %idx_bounds, -16
+ %sh = zext nneg i32 %idx_align to i128
+ %ld = load i128, ptr %word, align 8
+ %sub = lshr i128 %ld, %sh
+ %res = trunc i128 %sub to i16
+ ret i16 %res
+}
+
+define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: movzbl 12(%ebp), %eax
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl 4(%ecx), %esi
+; X86-NEXT: movl 8(%ecx), %edi
+; X86-NEXT: movl 12(%ecx), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, (%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: andb $96, %al
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: movl (%esp,%eax), %eax
+; X86-NEXT: leal -8(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: extractSub128_32:
+; SSE: # %bb.0:
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: movq 8(%rdi), %rdx
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: andb $32, %cl
+; SSE-NEXT: movq %rdx, %rdi
+; SSE-NEXT: shrq %cl, %rdi
+; SSE-NEXT: shrdq %cl, %rdx, %rax
+; SSE-NEXT: testb $64, %sil
+; SSE-NEXT: cmovneq %rdi, %rax
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: extractSub128_32:
+; AVX: # %bb.0:
+; AVX-NEXT: movq (%rdi), %rdx
+; AVX-NEXT: movq 8(%rdi), %rax
+; AVX-NEXT: movl %esi, %ecx
+; AVX-NEXT: andb $32, %cl
+; AVX-NEXT: shrdq %cl, %rax, %rdx
+; AVX-NEXT: shrxq %rcx, %rax, %rax
+; AVX-NEXT: testb $64, %sil
+; AVX-NEXT: cmoveq %rdx, %rax
+; AVX-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX-NEXT: retq
+ %idx_bounds = and i32 %idx, 127
+ %idx_align = and i32 %idx_bounds, -32
+ %sh = zext nneg i32 %idx_align to i128
+ %ld = load i128, ptr %word, align 8
+ %sub = lshr i128 %ld, %sh
+ %res = trunc i128 %sub to i32
+ ret i32 %res
+}
+
+define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: movzbl 12(%ebp), %eax
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl 4(%ecx), %esi
+; X86-NEXT: movl 8(%ecx), %edi
+; X86-NEXT: movl 12(%ecx), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, (%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: andb $64, %al
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: movzbl %al, %ecx
+; X86-NEXT: movl (%esp,%ecx), %eax
+; X86-NEXT: movl 4(%esp,%ecx), %edx
+; X86-NEXT: leal -8(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: extractSub128_64:
+; X64: # %bb.0:
+; X64-NEXT: testb $64, %sil
+; X64-NEXT: je .LBB3_1
+; X64-NEXT: # %bb.2:
+; X64-NEXT: movq 8(%rdi), %rax
+; X64-NEXT: retq
+; X64-NEXT: .LBB3_1:
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: retq
+ %idx_bounds = and i32 %idx, 127
+ %idx_align = and i32 %idx_bounds, -64
+ %sh = zext nneg i32 %idx_align to i128
+ %ld = load i128, ptr %word, align 8
+ %sub = lshr i128 %ld, %sh
+ %res = trunc i128 %sub to i64
+ ret i64 %res
+}
+
+define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub512_8:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $192, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl (%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 4(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%eax), %ebx
+; X86-NEXT: movl 44(%eax), %edi
+; X86-NEXT: movl 48(%eax), %esi
+; X86-NEXT: movl 52(%eax), %edx
+; X86-NEXT: movl 56(%eax), %ecx
+; X86-NEXT: movl 60(%eax), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 12(%ebp), %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: andl $24, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: shrl $3, %edx
+; X86-NEXT: andl $60, %edx
+; X86-NEXT: movl 48(%esp,%edx), %eax
+; X86-NEXT: movl 52(%esp,%edx), %edx
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shrdl %cl, %edx, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: extractSub512_8:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: # kill: def $esi killed $esi def $rsi
+; SSE-NEXT: movups (%rdi), %xmm0
+; SSE-NEXT: movups 16(%rdi), %xmm1
+; SSE-NEXT: movups 32(%rdi), %xmm2
+; SSE-NEXT: movups 48(%rdi), %xmm3
+; SSE-NEXT: xorps %xmm4, %xmm4
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: andl $56, %ecx
+; SSE-NEXT: shrl $3, %esi
+; SSE-NEXT: andl $56, %esi
+; SSE-NEXT: movq -128(%rsp,%rsi), %rdx
+; SSE-NEXT: shrq %cl, %rdx
+; SSE-NEXT: movl -120(%rsp,%rsi), %eax
+; SSE-NEXT: addl %eax, %eax
+; SSE-NEXT: notl %ecx
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shlq %cl, %rax
+; SSE-NEXT: orl %edx, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $rax
+; SSE-NEXT: popq %rcx
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: extractSub512_8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rax
+; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX2-NEXT: vmovups (%rdi), %ymm0
+; AVX2-NEXT: vmovups 32(%rdi), %ymm1
+; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: andl $56, %ecx
+; AVX2-NEXT: shrl $3, %esi
+; AVX2-NEXT: andl $56, %esi
+; AVX2-NEXT: shrxq %rcx, -128(%rsp,%rsi), %rax
+; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
+; AVX2-NEXT: notl %ecx
+; AVX2-NEXT: movl -120(%rsp,%rsi), %edx
+; AVX2-NEXT: addl %edx, %edx
+; AVX2-NEXT: shlxq %rcx, %rdx, %rcx
+; AVX2-NEXT: orl %ecx, %eax
+; AVX2-NEXT: # kill: def $al killed $al killed $rax
+; AVX2-NEXT: popq %rcx
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: extractSub512_8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vmovups (%rdi), %ymm0
+; AVX512-NEXT: vmovups 32(%rdi), %ymm1
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: andl $56, %ecx
+; AVX512-NEXT: shrl $3, %esi
+; AVX512-NEXT: andl $56, %esi
+; AVX512-NEXT: shrxq %rcx, -128(%rsp,%rsi), %rax
+; AVX512-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
+; AVX512-NEXT: notl %ecx
+; AVX512-NEXT: movl -120(%rsp,%rsi), %edx
+; AVX512-NEXT: addl %edx, %edx
+; AVX512-NEXT: shlxq %rcx, %rdx, %rcx
+; AVX512-NEXT: orl %ecx, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $rax
+; AVX512-NEXT: popq %rcx
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %idx_bounds = and i32 %idx, 511
+ %idx_align = and i32 %idx_bounds, -8
+ %ld = load i512, ptr %word, align 8
+ %sh = zext nneg i32 %idx_align to i512
+ %sub = lshr i512 %ld, %sh
+ %res = trunc i512 %sub to i8
+ ret i8 %res
+}
+
+define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub512_64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $192, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl (%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 4(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%eax), %ebx
+; X86-NEXT: movl 44(%eax), %edi
+; X86-NEXT: movl 48(%eax), %esi
+; X86-NEXT: movl 52(%eax), %edx
+; X86-NEXT: movl 56(%eax), %ecx
+; X86-NEXT: movl 60(%eax), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: andl $56, %ecx
+; X86-NEXT: movl 48(%esp,%ecx), %eax
+; X86-NEXT: movl 52(%esp,%ecx), %edx
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: ex...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/165245
More information about the llvm-commits
mailing list