<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Failure to leverage existing memory location when spilling"
href="https://bugs.llvm.org/show_bug.cgi?id=40534">40534</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Failure to leverage existing memory location when spilling
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Linux
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Register Allocator
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>listmail@philipreames.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org, quentin.colombet@gmail.com
</td>
</tr></table>
<p>
<div>
<pre>Filing this under register allocator, but really not sure that's the
appropriate place to fix it. The basic problem - illustrated by the test below
- is that we can have a value live in a loop which changes on every iteration,
but already has a memory location available w/said contents. Instead of
inserting new spill slots, it would be ideal to reuse the existing locations.
This particular example was motivated by our statepoint lowering - which
unfortunately, appears to create such idioms - but the problem is visible in
arbitrary IR as well. For statepoints, doing this at the MI layer would be
preferred, but I see a lot of argument for this being a missed IR level
PRE-like store transform as well.
declare void @clobber()
declare void @use(...)
define void @test(i64* %p, i64 %cnt) {
entry:
%p1 = getelementptr i64, i64* %p, i64 1
%p2 = getelementptr i64, i64* %p, i64 2
%p3 = getelementptr i64, i64* %p, i64 3
%p4 = getelementptr i64, i64* %p, i64 4
%p5 = getelementptr i64, i64* %p, i64 5
%p6 = getelementptr i64, i64* %p, i64 6
%p7 = getelementptr i64, i64* %p, i64 7
%p8 = getelementptr i64, i64* %p, i64 8
%p9 = getelementptr i64, i64* %p, i64 9
%p10 = getelementptr i64, i64* %p, i64 10
%p11 = getelementptr i64, i64* %p, i64 11
%p12 = getelementptr i64, i64* %p, i64 12
%p13 = getelementptr i64, i64* %p, i64 13
%p14 = getelementptr i64, i64* %p, i64 14
store i64 0, i64* %p
store i64 0, i64* %p1
store i64 0, i64* %p2
store i64 0, i64* %p3
store i64 0, i64* %p4
store i64 0, i64* %p5
store i64 0, i64* %p6
store i64 0, i64* %p7
store i64 0, i64* %p8
store i64 0, i64* %p9
store i64 0, i64* %p10
store i64 0, i64* %p11
store i64 0, i64* %p12
store i64 0, i64* %p13
store i64 0, i64* %p14
br label %loop
loop:
%iv = phi i64 [0, %entry], [%iv.next, %loop]
%v1 = phi i64 [0, %entry], [%v2, %loop]
%v1p1 = phi i64 [0, %entry], [%v2p1, %loop]
%v1p2 = phi i64 [0, %entry], [%v2p2, %loop]
%v1p3 = phi i64 [0, %entry], [%v2p3, %loop]
%v1p4 = phi i64 [0, %entry], [%v2p4, %loop]
%v1p5 = phi i64 [0, %entry], [%v2p5, %loop]
%v1p6 = phi i64 [0, %entry], [%v2p6, %loop]
%v1p7 = phi i64 [0, %entry], [%v2p7, %loop]
%v1p8 = phi i64 [0, %entry], [%v2p8, %loop]
%v1p9 = phi i64 [0, %entry], [%v2p9, %loop]
%v1p10 = phi i64 [0, %entry], [%v2p10, %loop]
%v1p11 = phi i64 [0, %entry], [%v2p11, %loop]
%v1p12 = phi i64 [0, %entry], [%v2p12, %loop]
%v1p13 = phi i64 [0, %entry], [%v2p13, %loop]
%v1p14 = phi i64 [0, %entry], [%v2p14, %loop]
store i64 %v1, i64* %p
store i64 %v1p1, i64* %p1
store i64 %v1p2, i64* %p2
store i64 %v1p3, i64* %p3
store i64 %v1p4, i64* %p4
store i64 %v1p5, i64* %p5
store i64 %v1p6, i64* %p6
store i64 %v1p7, i64* %p7
store i64 %v1p8, i64* %p8
store i64 %v1p9, i64* %p9
store i64 %v1p9, i64* %p10
store i64 %v1p10, i64* %p11
store i64 %v1p12, i64* %p12
store i64 %v1p13, i64* %p13
store i64 %v1p14, i64* %p14
call void @clobber()
%v2 = load i64, i64* %p
%v2p1 = load i64, i64* %p1
%v2p2 = load i64, i64* %p2
%v2p3 = load i64, i64* %p3
%v2p4 = load i64, i64* %p4
%v2p5 = load i64, i64* %p5
%v2p6 = load i64, i64* %p6
%v2p7 = load i64, i64* %p7
%v2p8 = load i64, i64* %p8
%v2p9 = load i64, i64* %p9
%v2p10 = load i64, i64* %p10
%v2p11 = load i64, i64* %p11
%v2p12 = load i64, i64* %p12
%v2p13 = load i64, i64* %p13
%v2p14 = load i64, i64* %p14
%iv.next = add i64 %iv, 1
%exit.cmp = icmp sgt i64 %iv, 200
br i1 %exit.cmp, label %exit, label %loop
exit:
call void (...) @use(i64 %v2)
ret void
}
$ ../build/bin/opt -O1 -S loop-stld.ll | ../build/bin/llc -O3
.text
.file "loop-stld.ll"
.globl test # -- Begin function test
.p2align 4, 0x90
.type test,@function
test: # @test
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
pushq %r15
.cfi_def_cfa_offset 24
pushq %r14
.cfi_def_cfa_offset 32
pushq %r13
.cfi_def_cfa_offset 40
pushq %r12
.cfi_def_cfa_offset 48
pushq %rbx
.cfi_def_cfa_offset 56
subq $24, %rsp
.cfi_def_cfa_offset 80
.cfi_offset %rbx, -56
.cfi_offset %r12, -48
.cfi_offset %r13, -40
.cfi_offset %r14, -32
.cfi_offset %r15, -24
.cfi_offset %rbp, -16
movq %rdi, %rbx
movq $0, 112(%rdi)
movq $0, 104(%rdi)
movq $0, 96(%rdi)
movq $0, 88(%rdi)
movq $0, 80(%rdi)
movq $0, 72(%rdi)
movq $0, 64(%rdi)
movq $0, 56(%rdi)
movq $0, 48(%rdi)
movq $0, 40(%rdi)
movq $0, 32(%rdi)
movq $0, 24(%rdi)
movq $0, 16(%rdi)
movq $0, 8(%rdi)
movq $0, (%rdi)
movq $-1, %r14
xorl %eax, %eax
movq %rax, 8(%rsp) # 8-byte Spill
xorl %edi, %edi
xorl %esi, %esi
xorl %ecx, %ecx
xorl %edx, %edx
xorl %ebp, %ebp
xorl %r12d, %r12d
xorl %r13d, %r13d
xorl %r10d, %r10d
xorl %eax, %eax
xorl %r11d, %r11d
xorl %r15d, %r15d
xorl %r9d, %r9d
xorl %r8d, %r8d
.p2align 4, 0x90
.LBB0_1: # %loop
# =>This Inner Loop Header: Depth=1
movq %rbp, 16(%rsp) # 8-byte Spill
movq %rdi, %rbp
movq 8(%rsp), %rdi # 8-byte Reload <-- PROBLEM
movq %rdi, (%rbx)
movq %rbp, 8(%rbx)
movq %rsi, 16(%rbx)
movq %rcx, 24(%rbx)
movq %rdx, 32(%rbx)
movq 16(%rsp), %rcx # 8-byte Reload <-- PROBLEM
movq %rcx, 40(%rbx)
movq %r12, 48(%rbx)
movq %r13, 56(%rbx)
movq %r10, 64(%rbx)
movq %rax, 72(%rbx)
movq %rax, 80(%rbx)
movq %r11, 88(%rbx)
movq %r15, 96(%rbx)
movq %r9, 104(%rbx)
movq %r8, 112(%rbx)
callq clobber
movq (%rbx), %rax
movq %rax, 8(%rsp) # 8-byte Spill
movq 8(%rbx), %rdi
movq 16(%rbx), %rsi
movq 24(%rbx), %rcx
movq 32(%rbx), %rdx
movq 40(%rbx), %rbp
movq 48(%rbx), %r12
movq 56(%rbx), %r13
movq 64(%rbx), %r10
movq 72(%rbx), %rax
movq 80(%rbx), %r11
movq 96(%rbx), %r15
movq 104(%rbx), %r9
movq 112(%rbx), %r8
incq %r14
cmpq $201, %r14
jb .LBB0_1
# %bb.2: # %exit
movq 8(%rsp), %rdi # 8-byte Reload
xorl %eax, %eax
addq $24, %rsp
.cfi_def_cfa_offset 56
popq %rbx
.cfi_def_cfa_offset 48
popq %r12
.cfi_def_cfa_offset 40
popq %r13
.cfi_def_cfa_offset 32
popq %r14
.cfi_def_cfa_offset 24
popq %r15
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
jmp use # TAILCALL
.Lfunc_end0:
.size test, .Lfunc_end0-test
.cfi_endproc
# -- End function
.section ".note.GNU-stack","",@progbits</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>