<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/119561>119561</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
overeager autovectorization
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
purplesyringa
</td>
</tr>
</table>
<pre>
```c
unsigned actually_how(unsigned *state, unsigned n) {
if (n > 2) {
return 0;
}
unsigned sum = 0;
for (unsigned i = 0; i < n; i++) {
sum += state[i];
}
return sum;
}
```
On the current trunk, under `-O2 -march=alderlake` this compiles to:
```x86asm
.LCPI0_0:
.quad -9223372036854775808
.quad -9223372036854775808
.LCPI0_1:
.quad -9223372036854775808
.quad -9223372036854775807
actually_how(unsigned int*, unsigned int):
lea ecx, [rsi - 1]
xor eax, eax
cmp ecx, 1
ja .LBB0_2
mov eax, esi
dec rax
vmovq xmm0, rax
vpbroadcastq xmm0, xmm0
vpxor xmm0, xmm0, xmmword ptr [rip + .LCPI0_0]
vmovdqa xmm1, xmmword ptr [rip + .LCPI0_1]
vpcmpgtq xmm0, xmm1, xmm0
vpcmpeqd xmm1, xmm1, xmm1
vpxor xmm0, xmm0, xmm1
vinsertps xmm0, xmm0, xmm0, 156
vpmaskmovd xmm0, xmm0, xmmword ptr [rdi]
vpshufd xmm1, xmm0, 85
vpaddd xmm0, xmm0, xmm1
vmovd eax, xmm0
.LBB0_2:
ret
```
I'm having trouble imagining under which conditions vectorizing a loop that runs at most twice like this is a good idea.
Even worse, LLVM sometimes emits a *loop* that can run at most once. `-O2 -march=znver4`:
```c
unsigned long actually_how(unsigned long* p, unsigned long n) {
if (n > 2) {
return 0;
}
unsigned long* end = p + n;
unsigned long sum = 0;
while (p != end) {
sum += *p++;
}
return sum;
}
```
```x86asm
.LCPI0_1:
.byte 0
.byte 1
actually_how(unsigned long*, unsigned long):
lea rax, [rsi - 3]
cmp rax, -2
jae .LBB0_3
xor eax, eax
ret
.LBB0_3:
vpmovsxbq xmm2, word ptr [rip + .LCPI0_1]
movabs rax, 2305843009213693951
vpxor xmm3, xmm3, xmm3
add rsi, rax
mov al, 61
bzhi rcx, rsi, rax
and esi, 1
neg rsi
vpbroadcastq xmm0, rcx
lea rax, [rcx + rsi + 2]
xor ecx, ecx
.LBB0_4:
vmovdqa xmm1, xmm3
vpbroadcastq xmm3, rcx
vpor xmm3, xmm3, xmm2
vpcmpleuq k1, xmm3, xmm0
vmovdqu64 xmm3 {k1} {z}, xmmword ptr [rdi + 8*rcx]
add rcx, 2
vpaddq xmm3, xmm3, xmm1
cmp rax, rcx
jne .LBB0_4
vmovdqa64 xmm1 {k1}, xmm3
vpshufd xmm0, xmm1, 238
vpaddq xmm0, xmm1, xmm0
vmovq rax, xmm0
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJyMV0tv4joU_jVmcwTyIwmwYAHtVBqpV3NXdzsysSFuEzvYTmj7668ch0B4zLRCsuXzPj7-vpQ7p_ZayhVKNyh9nvDGF8au6sbWpXSfVuk9n2yN-FyhDMdfjvC60Z2ZAJ77hpfl5-_CHBFdDOeIrp3nXiL6BMOhRnQJaL5BeA0AoHaA6EIDYj-AjkXhz0rfWA0YsdMpmj_3u8GlaypA7PlSa2csXKaiBoVu-wS62yK66X7XYTuPdBNsYgXpRqH0-SaLPj3XVFEUj4cuhT1e_9LgCwl5Y63UHrxt9HtsiZAWUIanvyhMK27zArFnXgppS_4uUYbBF8pBbqpaldKBN4ito88hxMci465CeD17ffr3J_6No8qpkNmh4QKmS0oZm1PMskWazOfpAi9ulOCRWu-Z9J7_ov3XyHOE148mRmmP6Ho0L93RclxVKTnI_CPooXRjnYIpkHA_Z5UPY7tV8k4tLGdhXtVRGH2QIHrjfd6vmw3-TS-0K9OOXDl1IRQyj4M6CtBWpj3AR1XhYHElq7fWcJFz5w9dpr1Wt16qxRJG4rgejRVQe9sVr-owqTBc_6gLIQ9x4MGI_N34qoVtnVf1PiZ5lSi5m3Be1fIgLuXn9VuFjdSUdtL62t1T7FaSZiO3FXfvod6bbO_2Tajral3R7MS4OvoEi3SkxIUQj3Nv-_j9qPQdOo3UaIat9LdQ8RPReQUFb5Xeg7em2ZYSVMX3SoeTiBjHQuUF5EYL5ZXRDlqZe2PVV1DhUBpTgy-4B9toB9xDZZwHf1S5hFK9y4gqygGHvTEClJB8FuP_aKWGo7Guw-zX1__-AWcq6VUlHchK-WCE6DrEQHQdw-Rch1BDJKNzObvBtS_dSpuESm8wbEQmpQlVPMCHIAxx6xFEdCbf5JVvEcopjNSi4434SvTZZhz6Dv8cC1XKkEEwJUEotfgjzyC6rns6upPat4jmISmQK1LYfnoJAPjmjPwBm_um3HT-LjqfIPESodn4wZ1AuFebXkLuG5cXYMy-i-vxTZ2MRkm1dWVa97E9nMGMBgffh8PKtHzrhnwpw-kiYRgvKWHZki3TByDHeiQ4r2c1LiJaWaduieLEO7wMsuzS_farUGAje9215To6llF4aavlfogZIOsBHQXvf7zT_KNrVrjbsNJb_u3pVXae4q0kV7dyy0_sEVWeWjhOrK3jQNxpNL0mp1I2p-t_J1fK-CarJkvOs8LCw30naP4cNl_h-d3llK4VC0TXIctRQ4arjk2h16xyuFsDuXowff_HPXjTl68lue3vZSVkqORuwwcSHFE9ZYv7-d79IIhfP8OwXHX3ivgmYsXEki35RK7InLElzXBKJ8VKsp1kImM7shMJZXMqqchIShKKxY5u6UStKKYJoYSQBM8ZneEloRlJEkzIMiHbHCVYVlyVs7Jsq5mx-4lyrpErQpZpRiYl38rSdf_0UKrlETopomGSJ3YVjKbbZu9QgkvlvDu78cqXcmVaaSXfSwu88eZEwjxw8qSx5arwvnZh3ukLoi975YtmO8tNhehL8NQv09qaN5l7RF-6-A7Rlz7BdkX_DwAA__89dr-0">