<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/130872>130872</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[aarch64] clang/LLVM fails to vectorize simple loop
</td>
</tr>
<tr>
<th>Labels</th>
<td>
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
haasn
</td>
</tr>
</table>
<pre>
I believe this to be a bug, as the scalar version is significantly slower.
## Code
```c
void read32x2(char *restrict a, char *restrict b, const char *restrict in)
{
for (int i = 0; i < 32; i++) {
a[i] = in[2 * i + 0];
b[i] = in[2 * i + 1];
}
}
```
## clang trunk `-O3`:
```asm
read32x2:
ldrb w8, [x2]
ldrb w9, [x2, #1]
strb w8, [x0]
ldrb w8, [x2, #2]
strb w9, [x1]
ldrb w9, [x2, #3]
strb w8, [x0, #1]
ldrb w8, [x2, #4]
strb w9, [x1, #1]
ldrb w9, [x2, #5]
strb w8, [x0, #2]
ldrb w8, [x2, #6]
strb w9, [x1, #2]
ldrb w9, [x2, #7]
strb w8, [x0, #3]
ldrb w8, [x2, #8]
strb w9, [x1, #3]
ldrb w9, [x2, #9]
strb w8, [x0, #4]
ldrb w8, [x2, #10]
strb w9, [x1, #4]
ldrb w9, [x2, #11]
strb w8, [x0, #5]
ldrb w8, [x2, #12]
strb w9, [x1, #5]
ldrb w9, [x2, #13]
strb w8, [x0, #6]
ldrb w8, [x2, #14]
strb w9, [x1, #6]
ldrb w9, [x2, #15]
strb w8, [x0, #7]
ldrb w8, [x2, #16]
strb w9, [x1, #7]
ldrb w9, [x2, #17]
strb w8, [x0, #8]
ldrb w8, [x2, #18]
strb w9, [x1, #8]
ldrb w9, [x2, #19]
strb w8, [x0, #9]
ldrb w8, [x2, #20]
strb w9, [x1, #9]
ldrb w9, [x2, #21]
strb w8, [x0, #10]
ldrb w8, [x2, #22]
strb w9, [x1, #10]
ldrb w9, [x2, #23]
strb w8, [x0, #11]
ldrb w8, [x2, #24]
strb w9, [x1, #11]
ldrb w9, [x2, #25]
strb w8, [x0, #12]
ldrb w8, [x2, #26]
strb w9, [x1, #12]
ldrb w9, [x2, #27]
strb w8, [x0, #13]
ldrb w8, [x2, #28]
strb w9, [x1, #13]
ldrb w9, [x2, #29]
strb w8, [x0, #14]
ldrb w8, [x2, #30]
strb w9, [x1, #14]
ldrb w9, [x2, #31]
strb w8, [x0, #15]
ldrb w8, [x2, #32]
strb w9, [x1, #15]
ldrb w9, [x2, #33]
strb w8, [x0, #16]
ldrb w8, [x2, #34]
strb w9, [x1, #16]
ldrb w9, [x2, #35]
strb w8, [x0, #17]
ldrb w8, [x2, #36]
strb w9, [x1, #17]
ldrb w9, [x2, #37]
strb w8, [x0, #18]
ldrb w8, [x2, #38]
strb w9, [x1, #18]
ldrb w9, [x2, #39]
strb w8, [x0, #19]
ldrb w8, [x2, #40]
strb w9, [x1, #19]
ldrb w9, [x2, #41]
strb w8, [x0, #20]
ldrb w8, [x2, #42]
strb w9, [x1, #20]
ldrb w9, [x2, #43]
strb w8, [x0, #21]
ldrb w8, [x2, #44]
strb w9, [x1, #21]
ldrb w9, [x2, #45]
strb w8, [x0, #22]
ldrb w8, [x2, #46]
strb w9, [x1, #22]
ldrb w9, [x2, #47]
strb w8, [x0, #23]
ldrb w8, [x2, #48]
strb w9, [x1, #23]
ldrb w9, [x2, #49]
strb w8, [x0, #24]
ldrb w8, [x2, #50]
strb w9, [x1, #24]
ldrb w9, [x2, #51]
strb w8, [x0, #25]
ldrb w8, [x2, #52]
strb w9, [x1, #25]
ldrb w9, [x2, #53]
strb w8, [x0, #26]
ldrb w8, [x2, #54]
strb w9, [x1, #26]
ldrb w9, [x2, #55]
strb w8, [x0, #27]
ldrb w8, [x2, #56]
strb w9, [x1, #27]
ldrb w9, [x2, #57]
strb w8, [x0, #28]
ldrb w8, [x2, #58]
strb w9, [x1, #28]
ldrb w9, [x2, #59]
strb w8, [x0, #29]
ldrb w8, [x2, #60]
strb w9, [x1, #29]
ldrb w9, [x2, #61]
strb w8, [x0, #30]
ldrb w8, [x2, #62]
strb w9, [x1, #30]
ldrb w9, [x2, #63]
strb w8, [x0, #31]
strb w9, [x1, #31]
ret
```
## GCC trunk `-O3`:
```asm
read32x2:
ld2 {v28.16b - v29.16b}, [x2], 32
ld2 {v30.16b - v31.16b}, [x2]
stp q28, q30, [x0]
stp q29, q31, [x1]
ret
```
## See Also
https://godbolt.org/z/5aWdbjTEx
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJykmV2P4jYUhn-NubEGOcdxPi5yAUOpKm3Vi1bttZN4wFtPwsaGnd1fXzksAxPn46w6QkoG7OOHwzx5TUZaqw-NUgURWyJ2K3l2x7YrjlLaZlW29bfiN1oqo9VFUXfUlrqWlopKWp4PBJ6ptNQdFbWVNLKjF9VZ3TZUW-rL6hddycaZb9Sa9qvq1oRt_AM4AU6f21r53xJ2fVSEbS6trmmnZM3hDQhk1VF2lMCmU9Z1unJU-kWDZ8v-2baxLnxNNwRyv066JWxDKaUvrR-R6cZRTQnfUUb4tj99phz6cwLb_pHT-zT_I4nYaiJ2_TTdELEFv5ifDFvKiNgR_ji-nB0ffRhP0l2PuXtsyoeOVUY2B-q6c_MvJQl7-oP7EXzzOEHaV8I27y3sX7zRmLor_fFr5vtFxPYNPMFgwNf8_qo_AR59HGXdsAwbKTNY51rptt57ife1ookSAQxfhhmlnoaKJyo-sE1XDAEFFnCs-aOAyUzX5ksFcOkIXEjGsWQZtnW3itNkObZtMRYuYli6qZKhCdHwowjwBBoPsHhTJUM8tBzJ8PMI8dBaJGg8tBopuonLbkzVCvHG5BjFy9B4aEGyRUEitCE5-nKMNmSqZEAJy4YMvZzhQysyWTMExAdItCgJ4LMDHXCAtmR4BZnhXNZkslgIiPZkeD2aAUSLEi1HCaBNGV7kpgE5WpXJmuFWBuEKOk6Ge4AZQHSe_MRmazlQfmKfhU4U_GZreHGf4US4gs6U0Q3XOCA6VPC7rmg5VfD7rmH-zGyo8a6gcyVedmWYZjOAaFcma4aAaFdgOVditCuAzpUY_8UEnSsx4qsJOlditCvDDJ8BRLsCy7kSo10Z7gumAQXalcma4TdQhCvoXBF4V9C5IvCuLOeKwLuCzhWBdwWdKwLhCjpXBN4VdK4IvCvLuSLwrqBzJcG7gs6VZNmV4dZvBhDtymTNEBDtSridDJcdpEWn3OTdxl-fn__XvUbojyTdXiBbR0lJn-gFcn9G0t39XYr-nMP1z-k-ibPbJB6NTXrsyKk_foG-JV84u3fm1pDbiPw6Iro3Bt2PP5WiG2NbwjZH507Wv1_YE9gf2rpsjVu33YHA_juBvZD_1OXnv355W9UFr3Oey5UqojSO0pjxBFbHosxTIWPFsgxApDyVVV3GacUBkqSEqlrpAhgIxiNgLE5YtI5EmijImMhjCS8qIzFTr1KbtTGXV7_2Slt7VkXEWZbCyshSGXu7rd8VftRTeT5YEjOjrbP3eU470_8DQMquOib-cnq91Uxg_-nT37_TF6lNf-f_oirXdvq7ola_noyipm1Pq3NnikFHtDuey3XVvhLY-3V-HJ5OXftZVY7Avoe1BPY_eC8F_BcAAP__B6q42g">