<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/131097>131097</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Missed Optimization: Failing to coalesce multiple stores from `memcpy()`
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
pan-0
</td>
</tr>
</table>
<pre>
Given:
```c
void f(uint8_t *out, uint8_t x, uint8_t y)
{
const uint8_t in[] = {x, y};
memcpy(out, in, sizeof in);
}
```
Clang generates:
```llvm
define dso_local void @f(ptr noundef writeonly captures(none) initializes((0, 2)) %out, i8 noundef zeroext %x, i8 noundef zeroext %y) local_unnamed_addr {
entry:
store i8 %x, ptr %out, align 1
%in.sroa.4.0..sroa_idx = getelementptr inbounds nuw i8, ptr %out, i64 1
store i8 %y, ptr %in.sroa.4.0..sroa_idx, align 1
ret void
}
```
With a curious workaround:
```c
inline static void *memcpy2(void *restrict out, const void *restrict in)
{
__builtin_memcpy_inline(out, in, 2);
return out;
}
#define xmemcpy(out, in, size) \
((size) == 2 ? memcpy2((out), (in)) : memcpy((out), (in), (size))
void g(uint8_t *out, uint8_t x, uint8_t y)
{
const uint8_t in[] = {x, y};
xmemcpy(out, in, sizeof in);
}
```
It generates:
```llvm
define dso_local void @g(ptr noundef writeonly captures(none) initializes((0, 2)) %out, i8 noundef zeroext %x, i8 noundef zeroext %y) local_unnamed_addr {
entry:
%in.sroa.4.0.insert.ext = zext i8 %y to i16
%in.sroa.4.0.insert.shift = shl nuw i16 %in.sroa.4.0.insert.ext, 8
%in.sroa.0.0.insert.ext = zext i8 %x to i16
%in.sroa.0.0.insert.insert = or disjoint i16 %in.sroa.4.0.insert.shift, %in.sroa.0.0.insert.ext
store i16 %in.sroa.0.0.insert.insert, ptr %out, align 1
ret void
}
```
Which affects codegen; `x86_64`:
```
f:
mov byte ptr [rdi], sil
mov byte ptr [rdi + 1], dl
ret
g:
shl edx, 8
or edx, esi
mov word ptr [rdi], dx
ret
```
`arm8v-a`:
```
f:
strb w1, [x0]
strb w2, [x0, #1]
ret
g:
bfi w1, w2, #8, #24
strh w1, [x0]
ret
```
[Godbolt](https://godbolt.org/z/3rW7Maxrs).
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJzUVk2P4jgQ_TXmYjVyHEKSQw4wiNEeRnucI3JiJ9SsYyPbaT5-_cp2aNJ89I600kqLEAmu8qtX5VeVMGuhU0JUKFujbDNjg9trUx2YeiOzWvNz9R3ehULpCpHwXZL4bRBZvWvguEW0GEC5Yucwois9OES_4evKafrnjGjpMfJ1ACsbraz7sIKKHDBKNxjl67D1jPINSkf_XvTN4YxoMQYB5X8tXIRuw59ydM03U6qR-TfJVIc7oYRhTtjHjKR87xFZcdGCEphbvZO6YRKHLNGC-EQPzmClB8VFi48GnNBKnnHDDm4wwiJaKK0EoiUGBQ6YhEtYRbQgnir1FGmJEc2uKRQfeBdhtDj5Iman1yZfQxyI7QalWC_4jnFucCyqUM6cY2oYW6eN8DBXRM_-FppJ6BROgiuiGai5NZrNF3MyD3c74KdwFp1wQopeKOcBQNWelcVqOGIoHnFhuRhRpwTOE8enoe4pGeFC6V-d509we8xwMxjQg8VHbf5ixjN7rlVQ0h-rdcxBM54pXUVFUUSL64oR1hloHB6ziRp9sEa1TbS829UDSAdqFzF3MeC9WOlNpKQ0wg1GhUhT3ZIVoukow9NrzQchZd8iVBTZx3K68QdHMUq3-JbjFaX0EIgWMYngv8IfgV54hfsxwJj7OAG6_24CfFGO3xgBf7h_0__d_6z_7xoNlBXGzQNMusEXfzO2JnYaQ7J8NgnGXXYPbdxn9zJ2frJ8HcHTL-7hyJckTi9ITHbFS9ioDeZgf2lQ7gsmgXVU7gsW0zH1GeYh7tcD9B-n1R6aPWZtKxpncaO56PxTdY3RkpyK5W658K4PmkRk1V7Hefz0-j1c67MTkU-2NhxQtomNIL3vCyeM6BonoyuXE1QjXIzcfY7mT9t_RJzPxcSkDZ6ahIUnLI_a8EeW_ORdx5if64SWhJm-eH9jv1kP60wdIiXhoLP1ifgodw5HerMGPaTJZ69XBahbwDf4EYamxXilC-9rndk_5_A8x2z9XfNaSxfqUeydO4R5RLeIbrtommvTIbq9ILpNzc_8BzsZi2g5n_Eq5WVaspmoknyRFFmaUjLbV2WbZBlvCM_rmtd5Tco8F6RsWlZS1uR8BhUlNCNpkpKCFAsyb_J8WTciWS4ytmA8QQsiegZy7iehjz8DawdRJWlCynwmWS2kDS-KlCpxxMGKKPXvjabym97qobNoQSRYZ28wDpwU1Q-wVnD858FBDxfmQPvXSrxlIEF1vv0bzaSwjcD9IB0cpIitaXFrdO87ZfKMKtGSzAYjq7vqgdsP9bzRPaLbMNHj5e1g9C_ROES3gbZFdDvm9V7RvwMAAP__5UU0tA">