<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/99391>99391</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Missed Optimization: saturating byte addition
</td>
</tr>
<tr>
<th>Labels</th>
<td>
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
bmaurer
</td>
</tr>
</table>
<pre>
The folly f14 hashtable has a 8 bit counter that "saturates" at 256 bytes.
https://github.com/facebook/folly/blob/c8b8d4cac3b7cf049b007fa08e12061c5b239a5e/folly/container/detail/F14Table.h#L572-L582
```
void incrOutboundOverflowCount() {
if (outboundOverflowCount_ != 255) {
++outboundOverflowCount_;
}
}
void decrOutboundOverflowCount() {
if (outboundOverflowCount_ != 255) {
--outboundOverflowCount_;
}
}
```
The optimal code for the incr case is:
```
movzx eax, BYTE PTR [rdi]
add al, 1
sbb al, 0
mov BYTE PTR [rdi], al
```
```
void add1(uint8_t* x) {
*x = *x == 255 ? 255 : *x + 1;
}
void add2(uint8_t* x) {
uint8_t res;
uint8_t carry = __builtin_add_overflow(*x, 1, &res) ? 1 : 0;
*x = res - carry;
}
# clang output
add1(unsigned char*): # @add1(unsigned char*)
movzx eax, byte ptr [rdi]
inc al
movzx eax, al
mov ecx, 255
cmovne ecx, eax
mov byte ptr [rdi], cl
ret
add2(unsigned char*): # @add2(unsigned char*)
movzx eax, byte ptr [rdi]
inc al
sete cl
sub al, cl
mov byte ptr [rdi], al
ret
```
Neither clang nor gcc does this for add1. GCC can do it for add2 but clang cannot. And I'm not sure what the optimal output is for the subtraction case but all of the compilers end up using multiple registers.
```
void sub1(uint8_t* x) {
*x = *x == 255 ? 255 : *x - 1;
}
void sub2(uint8_t* x) {
uint8_t xp1 = *x + 1;
uint8_t res;
uint8_t carry = __builtin_sub_overflow(xp1, 1, &res) ? 1 : 0;
*x = res - 1 + carry;
}
#gcc
sub1(unsigned char*):
movzx eax, BYTE PTR [rdi]
cmp al, -1
setne dl
sub eax, edx
mov BYTE PTR [rdi], al
ret
sub2(unsigned char*):
movzx eax, BYTE PTR [rdi]
inc al
sete dl
lea eax, [rax-2+rdx]
mov BYTE PTR [rdi], al
ret
# clang
sub1(unsigned char*): # @sub1(unsigned char*)
movzx eax, byte ptr [rdi]
lea ecx, [rax - 1]
cmp al, -1
movzx eax, cl
mov ecx, 255
cmovne ecx, eax
mov byte ptr [rdi], cl
ret
sub2(unsigned char*): # @sub2(unsigned char*)
movzx eax, byte ptr [rdi]
cmp al, -1
sete cl
add cl, al
dec cl
mov byte ptr [rdi], cl
ret
```
Full repro: https://godbolt.org/z/PqP7vcjsz
```
#include <cstdint>
#include <cassert>
#include <iostream>
void add1(uint8_t* x) {
*x = *x == 255 ? 255 : *x + 1;
}
void add2(uint8_t* x) {
uint8_t res;
uint8_t carry = __builtin_add_overflow(*x, 1, &res) ? 1 : 0;
*x = res - carry;
}
void sub1(uint8_t* x) {
*x = *x == 255 ? 255 : *x - 1;
}
void sub2(uint8_t* x) {
uint8_t xp1 = *x + 1;
uint8_t res;
uint8_t carry = __builtin_sub_overflow(xp1, 1, &res) ? 1 : 0;
*x = res - 1 + carry;
}
int main() {
for (int i = 0; i < 256; i ++) {
uint8_t a = i, b = i, c = i, d = i;
add1(&a);
add2(&b);
sub1(&c);
sub2(&d);
assert(a == b);
assert(c == d);
std::cout << (int)a << " " << (int)c << std::endl;
}
return 0;
}
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzUWN2O4joSfhpzUwIlFcLPBRdAT1Yrze6MVn2zVy3_AZ5jYo7t9ND99Ec2CQQIdJ-eGelM1E0cl11Vrvr8VRzqnFqXUs5IviD5Q49WfmPsjG1pZaXtMSNeZo8bCSuj9Qus0iFsqNt4yrQMLaAwAaY8cFOVXlrwG-qBIDrqK0u9dAQRqAfMR8BevHQDIMkDSeaH3433O0eyOcGCYLFWflOxATdbgsWKcsmM-SM0g3GCBdOGESz4hE3EkFOesTFfJcMpS5LxiiYTmWIySnnOMJvSXLZmclN6qkppCRZCeqo0waJIh49hIYMNwexzPsb-53yCbffIKKn_4iM8GyVAldx-qTwzVSm-PEu70ub7Miyf4ITgFMh4UQ8HALUCghPTNfwJCKYkewDM86t5AAQXBBfdM0nWGkrGD83Dsdl0RIeF_NUO9_sf9fM8wIffgDez82pLNXAjAvgCsGSMPHDqJKgImjuZqq-teX7dA4Cke4JLWPz_8RN8ffwfkHxhhSJ54xEVIo6nOgxLz5U4xlrCawvx3qEal2HK7XVedMZcUSFSgpNKlX7y5AnOYX-VIoLzPYQ8NI06J0Cyor7PaxkuID0m4QIdjT18y14tBCvdKaFNJ6fWvkRvnp5YpbRX5RMV4snUQIgYm-8PYcUlEBwFPcFGVkAafU3OcdIsz0oH_YOBW2sgmAHXtFyDqfyu8mfk0sSyjBQngG-oJTgnOD0EKAMyTO4MuoekwGWw8_YKSfWlSt5g5p6aDjFIHkVhk53J-NY8lxIaedDQicQO13AJ_MKSlf4YJbwdpbvXKYS3NPxoCC_j46SXMRaX_VV7i15K70fm0sYxMp2b9r9S-Y20NepKY2HNOQgjHfiNcpGrAqYG8K_lEjgtQRhQvulHYJWvJ3NalsYPYF4K-DfB8RZK48FVVsL3UEZ9iwdreNcGgsRVzFvKvTLlgRODYqo1mFWUc7PdKS2tA1kKqHZQOVWuYVtpr3ZagpVr5by0bnCHk1pM4Sr2E5mp_wYvuYrd56WGf_a7tGXxjO9uclerv4u-XMXa9LXfpR9nrzT69BaHBQgdHpood-7Gj9W2I33sWlukf1njpA_cAqJjY9UGpLhBOPdL32k_NTn9iWu7RbMNT1wuR0sa77XeoI7u-0hwYcX-pPadK7sijKYatdN7N6Pv4tc7GjqC9k52PUaCtyIRt2UXcLpBc5GlW7R7VdB-UiV7H7DeG-JfUcJu7LlbVax5C-W6C2ZCcjib9cGC30nxRaU1WLmzJoTs4lhmBDPaD4xdEyxeCRZf__w6fubf3Ou90oGZKrmuhASSLbnzQpWeZJ86pdQ5aW9JlXHeSro9iX_D9-U3as4PvDL_nfflf2Ilb0Xm9y_mqvSwpaq8OlqH1zaCkyBXUWWwFJuBGUf1Qzzzd3wMOK2Rxskqss-pyU9NUTezxRW5pNGrEY3EeC3Gg5h1iWvMEBzxM3GdXYIj0e6_0H3Y3TihDZI6bRyH8WaYOLflReCkbM5NOOllyxC7Q0wJTumpBw__FwN403PUI0uhb32iiIRZ2bKFiOuvFj0xy8Q0m9KenKVjTDFPJ2PsbWZJzkc84dPRiq8yKceUjzmfjphM8mRIp6ynZpjgMBmHWVmaJgMcregwScbDVTaa8CQnw0RuqdIDrZ-3gXp7yrlKzqbTbJr2NGVSu-ajnZ2FQX1WrR0ZJlo5707TvPJazv6jnJMCvoTjhHql4dQQ4F5_pgsHg1hFqBAqyHqV1bM7H-eC9vrW31nzTXJPsIgeOoLFwcnnGf4VAAD__xa-a1k">