<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/61117>61117</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[AArch64][Codegen] Much worse code when IV is decremented in latch block
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
max-quazan
</td>
</tr>
</table>
<pre>
https://godbolt.org/z/bY17rG8P3
Code:
```
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-ni:1-p2:32:8:8:32-ni:2"
define i64 @test_default(i64 %start, ptr %base, i32 %x) {
entry:
br label %loop
loop:
%iv = phi i64 [%start, %entry], [%iv.next, %backedge]
%iv.next = add nsw i64 %iv, -1
%done = icmp eq i64 %iv, 0
br i1 %done, label %not_found, label %backedge
backedge:
%gep = getelementptr i32, ptr %base, i64 %iv.next
%val = load atomic i32, ptr %gep unordered, align 4
%loop.check = icmp eq i32 %val, %x
br i1 %loop.check, label %found, label %loop
not_found:
ret i64 -1
found:
ret i64 %iv.next
}
define i64 @test_increment_in_backedge(i64 %start, ptr %base, i32 %x) {
entry:
br label %loop
loop:
%iv = phi i64 [%start, %entry], [%iv.next, %backedge]
%done = icmp eq i64 %iv, 0
br i1 %done, label %not_found, label %backedge
backedge:
%iv.next = add nsw i64 %iv, -1
%gep = getelementptr i32, ptr %base, i64 %iv.next
%val = load atomic i32, ptr %gep unordered, align 4
%loop.check = icmp eq i32 %val, %x
br i1 %loop.check, label %found, label %loop
not_found:
ret i64 -1
found:
ret i64 %iv.next
}
```
These two functions are 100% equivalent, the only difference is where `%iv.next` is computed. However, for some reason, it leads to much worse assembly when running `llc -mtriple=aarch64-none-linux-gnu`:
```
test_default: // @test_default
sub x8, x1, #4
.LBB0_1: // %loop
cbz x0, .LBB0_4
ldr w9, [x8, x0, lsl #2]
sub x0, x0, #1
cmp w9, w2
b.ne .LBB0_1
ret
.LBB0_4:
mov x0, #-1
ret
test_increment_in_backedge: // @test_increment_in_backedge
sub x9, x1, #4
.LBB1_1: // %loop
cbz x0, .LBB1_4
ldr w10, [x9, x0, lsl #2]
sub x8, x0, #1
mov x0, x8
cmp w10, w2
b.ne .LBB1_1
mov x0, x8
ret
.LBB1_4:
mov x0, #-1
ret
```
The latter has 2 extra moves. I am not a codegen expert (and specifically not in aarch64), but it looks like something that should be easy to avoid.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzUV12v4jYT_jXmZhSU2CTABRecRbzvSq1UqVWlXh058UDc49hZ2-Fjf31lJ0DgfFRbbdU2OieR7ZlnPM8zGRPunNxrxBXJn0i-mfDO18auGn5KvnT8K9eT0ojzqva-dYStCd0Sut0bURrlp8buCd1-JXRb_pbN7f8WPzGSbki67u-fjMDg008V6fAXh57bPXoQ3HPFz6bzQNgGCKWYNIStMZELwtbhn9FEZgVh63gLo2JG2LqYJTKjwSCji0Qz2s_9HEcyTCctjQ4DzoAV1yihdLxTgTupEWQxAzJLPTr_LHDHO-UJXcRZmjvPrSf0E7TehnHJHYahZDQMT4QugcyfekDU3p6vuQOUFhQvUQVLZUw7Dh7HN1NCc3mIbLS17LeUP43jE5r38PkmDuOqPEw1ni7rJa9eUOwxmIxho03E5kKAdkcYkpOH4JlkI2thNEZTWTUt4Jd703SUmcwu9mHlmqc2_nlnOi3uZq9bGzFwnbuyQGi-xzaG36NHhQ1qH4iXjL6lwWVrPQm3JA5cRRBluADuTSOrB4gQptPGCrQYd8qV3GuYjUCCQNOqxurlno9e-ANXA-unwedKyc3xjoLXpDyWxI26W11Y9DHPi0j9_T2rRzbIfPMnBS91ZSPNz1I_XxX5LuX_r6j-f7Sev_31---W_99T_9-r_O-PoV9qdAj-aGDX6cpLox1wi5ClKaE54JdOHrhCHUvL1whGqzMIuduhRV0hSAfHGi1CgLwFLdKwUpmm7TyKKfzfHPGANqDsjAVnGgSL3Bkd9fOgkAsH3kDTVTUcjXUI3DlsSnUOETTYTmup9yGQUhUkjbeyVUjYhnNb1cUs0UZjoqTuTsled2FD7x2-4wOOhW4bTvVXR9_AaX-5rozP0yLs-JT1orOhUKY_PD2lz9kYbKzqBaQqv_YgaXDvnS6lpoSNa8fl8F4PkaKpcqFOGB290g_bSm_GhLLsIW7TDrhHer9STjWG5yWBu0WLfpze7FZojTmMwhLKkvdcP-it7GodrwcV3vZ5O_nle5pkf0WT7KrJcAVpjll60WX5LbosPtIl0Dgot3itWKyGPuxHsmWPsr2PeidoNhZ05Pq2rlfX1x0EFPceLdTcAQU8ecsDELopfAbegDYeOFRG4B414KlF64HQBdcCXIuV3MmKK3WOhlLD8D4TGokuOx8bhDEvDpR8wdg9fB16ga-5B1ebTgkoEZC7c-gh_GCkmE7EioklW_IJrrJiPp8XaVbQSb1K2azIqyzL2bLKxJwVKWc7nLMdnc0WRc4nckVTylKW0ozSJaXTWSXmix0tqmzOOHJKZik2XKqpUocmfAZMpHMdroosy-aT2NZd_KKgVOMR4mL4yZ1vJnYVfJKy2zsyS5V03t1QvPQqfoqs1z0F-YbkT5965ki-gR9v3THw2XfGz7-GZitweF9QBBIV91UNpTLVy6Sz6vELRvq6K6eVaQjdhvDDI2mt-R0rT-g2btoRuo1J_REAAP__hzir0g">