<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/57640>57640</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
several missed optimizations in C++ using span and optional
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
alex
</td>
</tr>
</table>
<pre>
Consider the following code:
```c++
#include <optional>
#include <span>
#include <cstdint>
class Parser {
std::span<uint8_t> data;
public:
std::optional<uint8_t> read_u8() {
if (data.empty()) {
return std::nullopt;
}
uint8_t b = data[0];
data = data.subspan(1);
return b;
}
};
std::optional<uint32_t> f(Parser p) {
if (auto a = p.read_u8()) {
if (auto b = p.read_u8()) {
if (auto c = p.read_u8()) {
if (auto d = p.read_u8()) {
return (uint32_t)a.value() << 24 | (uint32_t)b.value() << 16 | (uint32_t)c.value() << 8 | (uint32_t)d.value();
}
}
}
}
return std::nullopt;
}
```
ideally this can be compiled to a length check followed by a `mov` and `bswap`.
Unfortunately the emitted code has several missed optimizations: https://godbolt.org/z/zf53fcq11
```asm
f(Parser): # @f(Parser)
test rsi, rsi
je .LBB0_1
dec rsi
cmp rsi, 3
jae .LBB0_3
.LBB0_1:
xor eax, eax
xor ecx, ecx
or rax, rcx
ret
.LBB0_3:
movzx eax, byte ptr [rdi]
movzx ecx, byte ptr [rdi + 1]
shl rcx, 16
movzx edx, byte ptr [rdi + 2]
shl rdx, 8
movzx esi, byte ptr [rdi + 3]
shl rax, 24
or rax, rcx
or rax, rdx
or rax, rsi
movabs rcx, 4294967296
or rax, rcx
ret
```
1. there are two bounds checks, one for length 0, then a subtraction, followed by a check against 3
2. `LBB0_1` is bizarre, the `xor ecx, ecx` and `or rax, rcx` are entirely necessary and could be omitted
3. The shifting and oring could be replaced by a single `mov` and `bswap`. LLVM is able to perform this optimization under other circumstances.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJydVs93qygU_mvMhjMehWjiwsVLOl11zpnFzGx7EEnCGwQHsG36189FTKLW9L0-Y0T57i8ulw8qXZ_LvVZW1Nwgd-LooKXUr0IdEdM1j8i3KHmIksszT8LNIrzzd-jFRCgmu5qjiOx164RWVEbk9yXYtlTdgZh1tVDuhvZPJqm16E9qLEQYbQafCC4Q9_GRb8HmvgPl7bPXRzV1NCK7sZ22q6Rg1wFNDNxiHhsxnNbP3TbCcBdT1_4SBwSQ9xTzpnXnILco6i_DXWfUzafqINGtu0Z5kYs2D9OOISJUQY4ewsiyXRJlDx9UPXYVim1X9YnB29THNRce4qkmwNW5f5nm716yCA7ZOoCjYZbaD0kIuaKd0yhE2MaT7H6S4F6p-mmlmSL7kuJMuf6y8iixIH1NDy5o_EJlxy-1RPZwI7wGS_uZZLUkmeYLkmxJcrsgWI8FPxTC3cJb7Jx0TD5-VOC32rqwyLi8gIColGfgIGERo1CYHBioaYXkNerLRnJ1dCfETpz9O9AUQNXZV1SeNPoFnoiq2n9V9pW20MZjF3-rgzauU9Tx3hFHvBHOgRFPdehELbL8hRsqUSOshX5f6o14p77gLQwJnZxr_UuEH-E-6rrS0sXaHOHr3f8PGTmw_9J0kTWpbULPba30E7JYRteEY4KidTJVmUyJ49b1E2BFhPd9M8G_89DGT7td8pzOOIMzdFGeAKxp0cgqmdmkfGRzAC8Oxhzrrzdt-pbTN2_KN8s4Czib4QOMTNA3cxwqbxwA-RAAFMf72y2A6uw4ah1sJ9nO1MJT6bI4WxKHGdmh9IOSPckQTFBK8zs267s28X2bQWl7x2SYoSWT5L7JkAq8_lqq53j9A3xeVRAzrew1S2tcrIt8g4v8F2d8kUvS2K9uwxGFv3uF7UN3qraBOqw3qJU_6ZgLpyS-D1QUcAlsm85Q5pe8750STSAfeqRCwZobyh7HnnKG2gcOAgKrgDSM4YNZD_saH9X3jaqgfzRK3w9Bc-WE8SylOOPWUnPuxZnuZO2ZUQfiCv5JjP4CJ_YkDs4f3LykNuEIN8gb3krKLqOwgEn-CWuip6d__vDjoBXIAfu23EC6msDOY1ZEkFjY8bXPN2LCsK6xjioIOl7xMs1znBdkvSWruiR1QQq6csJJXn5GtEgotA8HTNT5WJE_x4RhDWePVWdkOSNj4U5dFcOeAR9Svlya31qjv3MG--Aj-Oo4zP9jtsnXyepUUp4VWUIyShK-SciW4YRus02RMEYqRshK0opLW8KKijBW_BX1JuAd1tVKlDjBOCngR9JtQuI0Z6TItmyTp5Qc1hXQNm-okLGPw-8SK1P2IVXd0QIohXX2BsI5VxwV5707sA9nkJM2JZX8bdX7Lfu4_wfe0jdO">