<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/125354>125354</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Missed CSE opportunity in `(base + align - 1) & -align`
</td>
</tr>
<tr>
<th>Labels</th>
<td>
missed-optimization
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
Kmeakin
</td>
</tr>
</table>
<pre>
The code produced for `__builtin_align_up(base, align)` is `(base + align - 1) & -align`.
However, since `-align` is equivalent to `~(align - 1)`, we can reassocate the `+` to get `(base + align_minus_1) & ~align_minus_1`.
This saves one instruction on targets where and-not can be done in a single instruction (eg AArch64, x86 with BMI, riscv with zbb)
# C++
https://godbolt.org/z/oTP4W9ah1
```c++
auto src(uint64_t base, uint64_t align) -> uint64_t {
return (base + align - 1) & -align;
}
auto tgt(uint64_t base, uint64_t align) -> uint64_t {
auto mask = align - 1;
return (base + mask) & ~mask;
}
```
# LLVM-IR
https://alive2.llvm.org/ce/z/afXMqW
```llvm
define i64 @src(i64 noundef %base, i64 noundef %align) {
%4 = add i64 %base, -1
%5 = add i64 %4, %align
%6 = sub i64 0, %align
%7 = and i64 %5, %6
ret i64 %7
}
define i64 @tgt(i64 noundef %base, i64 noundef %align) {
%mask = add i64 %align, -1
%sum = add i64 %base, %mask
%not_mask = xor i64 %mask, -1
%ret = and i64 %sum, %not_mask
ret i64 %ret
}
```
# Assembly
```asm
; AArch64:
src:
add x8, x0, x1
neg x9, x1
sub x8, x8, #1
and x0, x8, x9
ret
tgt:
sub x8, x1, #1
add x9, x0, x8
bic x0, x8, x9
ret
; x86-64 + bmi:
src:
lea rax, [rdi + rsi]
dec rax
neg rsi
and rax, rsi
ret
tgt:
dec rsi
lea rdi, [rdi + rsi]
andn rax, rsi, rdi
ret
; riscv + zbb:
src:
add a0, a0, a1
addi a0, a0, -1
neg a1, a1
and a0, a0, a1
ret
tgt:
addi t1, a1, -1
add t2, a0, t1
andn a0, t1, t2
ret
```
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJykVk2P2zYQ_TX0ZeCFRH1YPuhg78Zo0CxQtEHTm0GJY4uNRDok5XVyyG8vSH1YVrzZohUMezV8fDPvzaxEZow4SsScJFuSPC1Yayul818bZJ-FXBSKf80_Vgil4ggnrXhbIoeD0kDSYL8vWlFbIfesFke5b0-EZgUzSOgj-BCha5IGIIyD94tA6LZbhSWEhK6B0BSWHT4NHkiw-UW94Bm1ozFCluh2jwDHhl9acWY1SgtWudXvhGZTTp_uEV4QSiZBIzNGlcwi2Aq7WraOyio4or1b3L4RsjX7scDvt9Gu0I-VMGDYGQ0oiSCksbotrVASlATL9BGtgZcKNQKTfCmV9QUVCLzbAMxJPNa3mwnN8AibjS6rNHZCLlkKL8JWsH1-7-61MOW5i3wrCic42LgPjeDRaaPbLlBZezIk2hC6I3R3VLxQtX1Q-kjo7huhO_Xxt_jTmlWhg6dB9ylHBtZaBUaXhGatkDaN9xaGBo-BodOwJNG7a5isHAMAgEbbai_qzf5Hvu7VU1e9T2-P9v-l9ywNM5-BRE-TzNFP6nPwsfP-ZlrZYNTV9A8f_nxevv_9numsFmekD3V9bnrjS-zdZ4e_nr98mjI6FAk2HA_CjUcaA4mDrgHuRqpWcjwAocngwyw8ujHoJzSJO-Gcd4TXvctwgCRziJ-6ka9HpR5l2sKjgnuQVUckR6KkR6UeodEOC6tpp28Fdy3_74Kvzb4K6nFT0aZtXnOmJxmQUtn9SHpReoB3YzLldAJnDpi26SkHmrkVGu3PhmtjDDZF_bWPDABm3KiQaDs-KCK37qbF_wH95eS565L5J4lv2yWcACQe4bL-Iez6PNmXdSKiKcSp9JDgCrm4ZxH0koKN6-VNOTPa8B7tUPF6UnHmAIUoX0nYX2Ne58slS5fe4S0UjXjFnhoZaHbxZSRbzYXHayNI8jSBcewyO-itdT5qxMyWnvJ24XVXRvobvKvNR7l4qz4muRzq6_O6H343vTOne4U4LvcGmXkzdIB5n_vvWYvEHLCcD5UHhD_ulW-Qv27TkNYOrPO0Q-GWXrlteMenccl908nIDv9eC55HfB2t2QLzcBVlcbQK0mhR5VkUc3agAcaHLExDSld8nZVxmRVhWaQHuhA5DWgS0CAM12EYRA9JzLOYJsUqpRlPD0jiABsm6vGlsBDGtJiHNImSeFGzAmvjj2SUNsIY5Et1sqIR35g7HhBK3VlN5277smiPhsRBLYw1V0IrbI35s98Mj3-8A3U6KW1bKexXd-z41-exRavrfHaMELZqi4dSNYTu_Bur-1metPobS0vozusxhO56Seec_hMAAP__qibW1g">