<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/119561>119561</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            overeager autovectorization
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          purplesyringa
      </td>
    </tr>
</table>

<pre>
    ```c
unsigned actually_how(unsigned *state, unsigned n) {
    if (n > 2) {
        return 0;
    }
    unsigned sum = 0;
    for (unsigned i = 0; i < n; i++) {
        sum += state[i];
    }
 return sum;
}
```

On the current trunk, under `-O2 -march=alderlake` this compiles to:

```x86asm
.LCPI0_0:
        .quad -9223372036854775808
        .quad   -9223372036854775808
.LCPI0_1:
 .quad   -9223372036854775808
        .quad -9223372036854775807
actually_how(unsigned int*, unsigned int):
        lea ecx, [rsi - 1]
        xor     eax, eax
        cmp     ecx, 1
 ja      .LBB0_2
        mov     eax, esi
        dec     rax
        vmovq xmm0, rax
        vpbroadcastq    xmm0, xmm0
        vpxor   xmm0, xmm0, xmmword ptr [rip + .LCPI0_0]
        vmovdqa xmm1, xmmword ptr [rip + .LCPI0_1]
        vpcmpgtq        xmm0, xmm1, xmm0
        vpcmpeqd xmm1, xmm1, xmm1
        vpxor   xmm0, xmm0, xmm1
        vinsertps xmm0, xmm0, xmm0, 156
        vpmaskmovd      xmm0, xmm0, xmmword ptr [rdi]
        vpshufd xmm1, xmm0, 85
        vpaddd  xmm0, xmm0, xmm1
 vmovd   eax, xmm0
.LBB0_2:
        ret
```

I'm having trouble imagining under which conditions vectorizing a loop that runs at most twice like this is a good idea.

Even worse, LLVM sometimes emits a *loop* that can run at most once. `-O2 -march=znver4`:

```c
unsigned long actually_how(unsigned long* p, unsigned long n) {
    if (n > 2) {
 return 0;
    }
    unsigned long* end = p + n;
    unsigned long sum = 0;
    while (p != end) {
        sum += *p++;
    }
    return sum;
}
```

```x86asm
.LCPI0_1:
        .byte   0
        .byte 1
actually_how(unsigned long*, unsigned long):
        lea     rax, [rsi - 3]
        cmp     rax, -2
        jae     .LBB0_3
        xor     eax, eax
        ret
.LBB0_3:
        vpmovsxbq       xmm2, word ptr [rip + .LCPI0_1]
        movabs  rax, 2305843009213693951
        vpxor   xmm3, xmm3, xmm3
        add     rsi, rax
        mov     al, 61
        bzhi rcx, rsi, rax
        and     esi, 1
        neg     rsi
 vpbroadcastq    xmm0, rcx
        lea     rax, [rcx + rsi + 2]
        xor ecx, ecx
.LBB0_4:
        vmovdqa xmm1, xmm3
        vpbroadcastq xmm3, rcx
        vpor    xmm3, xmm3, xmm2
        vpcmpleuq       k1, xmm3, xmm0
        vmovdqu64       xmm3 {k1} {z}, xmmword ptr [rdi + 8*rcx]
        add     rcx, 2
        vpaddq  xmm3, xmm3, xmm1
        cmp rax, rcx
        jne     .LBB0_4
        vmovdqa64       xmm1 {k1}, xmm3
        vpshufd xmm0, xmm1, 238
        vpaddq  xmm0, xmm1, xmm0
 vmovq   rax, xmm0
        ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJyMV0tv4joU_jVmcwTyIwmwYAHtVBqpV3NXdzsysSFuEzvYTmj7668ch0B4zLRCsuXzPj7-vpQ7p_ZayhVKNyh9nvDGF8au6sbWpXSfVuk9n2yN-FyhDMdfjvC60Z2ZAJ77hpfl5-_CHBFdDOeIrp3nXiL6BMOhRnQJaL5BeA0AoHaA6EIDYj-AjkXhz0rfWA0YsdMpmj_3u8GlaypA7PlSa2csXKaiBoVu-wS62yK66X7XYTuPdBNsYgXpRqH0-SaLPj3XVFEUj4cuhT1e_9LgCwl5Y63UHrxt9HtsiZAWUIanvyhMK27zArFnXgppS_4uUYbBF8pBbqpaldKBN4ito88hxMci465CeD17ffr3J_6No8qpkNmh4QKmS0oZm1PMskWazOfpAi9ulOCRWu-Z9J7_ov3XyHOE148mRmmP6Ho0L93RclxVKTnI_CPooXRjnYIpkHA_Z5UPY7tV8k4tLGdhXtVRGH2QIHrjfd6vmw3-TS-0K9OOXDl1IRQyj4M6CtBWpj3AR1XhYHElq7fWcJFz5w9dpr1Wt16qxRJG4rgejRVQe9sVr-owqTBc_6gLIQ9x4MGI_N34qoVtnVf1PiZ5lSi5m3Be1fIgLuXn9VuFjdSUdtL62t1T7FaSZiO3FXfvod6bbO_2Tajral3R7MS4OvoEi3SkxIUQj3Nv-_j9qPQdOo3UaIat9LdQ8RPReQUFb5Xeg7em2ZYSVMX3SoeTiBjHQuUF5EYL5ZXRDlqZe2PVV1DhUBpTgy-4B9toB9xDZZwHf1S5hFK9y4gqygGHvTEClJB8FuP_aKWGo7Guw-zX1__-AWcq6VUlHchK-WCE6DrEQHQdw-Rch1BDJKNzObvBtS_dSpuESm8wbEQmpQlVPMCHIAxx6xFEdCbf5JVvEcopjNSi4434SvTZZhz6Dv8cC1XKkEEwJUEotfgjzyC6rns6upPat4jmISmQK1LYfnoJAPjmjPwBm_um3HT-LjqfIPESodn4wZ1AuFebXkLuG5cXYMy-i-vxTZ2MRkm1dWVa97E9nMGMBgffh8PKtHzrhnwpw-kiYRgvKWHZki3TByDHeiQ4r2c1LiJaWaduieLEO7wMsuzS_farUGAje9215To6llF4aavlfogZIOsBHQXvf7zT_KNrVrjbsNJb_u3pVXae4q0kV7dyy0_sEVWeWjhOrK3jQNxpNL0mp1I2p-t_J1fK-CarJkvOs8LCw30naP4cNl_h-d3llK4VC0TXIctRQ4arjk2h16xyuFsDuXowff_HPXjTl68lue3vZSVkqORuwwcSHFE9ZYv7-d79IIhfP8OwXHX3ivgmYsXEki35RK7InLElzXBKJ8VKsp1kImM7shMJZXMqqchIShKKxY5u6UStKKYJoYSQBM8ZneEloRlJEkzIMiHbHCVYVlyVs7Jsq5mx-4lyrpErQpZpRiYl38rSdf_0UKrlETopomGSJ3YVjKbbZu9QgkvlvDu78cqXcmVaaSXfSwu88eZEwjxw8qSx5arwvnZh3ukLoi975YtmO8tNhehL8NQv09qaN5l7RF-6-A7Rlz7BdkX_DwAA__89dr-0">