<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/89269>89269</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Failure to access upper elements from a dereferenceable vector pointer
</td>
</tr>
<tr>
<th>Labels</th>
<td>
missed-optimization
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
RKSimon
</td>
</tr>
</table>
<pre>
```c
#include <cstdint>
#include <x86intrin.h>
struct PayLoad
{
int16_t x[2];
int16_t y[2];
int16_t z[2];
int16_t w[2];
};
__m128 lo(struct PayLoad &v) {
return _mm_setr_ps(v.x[0], v.y[0], v.z[0], v.w[0]);
}
__m128 hi(struct PayLoad &v) {
return _mm_setr_ps(v.x[1], v.y[1], v.z[1], v.w[1]);
}
```
We successfully load the lowest element as a `<8 x i16>` vector, but then fail to realise that the upper elements can be extracted/shuffled from the same vector load. The 'hi' variant doesn't realise we can just load the vector.
`clang -O3 -march=x86-64-v2`
```ll
define <4 x float> @lo(ptr nocapture noundef nonnull readonly align 2 dereferenceable(16) %v) {
entry:
%0 = load <8 x i16>, ptr %v, align 2
%1 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
%y = getelementptr inbounds i8, ptr %v, i64 4
%2 = load i16, ptr %y, align 2
%z = getelementptr inbounds i8, ptr %v, i64 8
%3 = load i16, ptr %z, align 2
%w = getelementptr inbounds i8, ptr %v, i64 12
%4 = load i16, ptr %w, align 2
%5 = insertelement <4 x i16> %1, i16 %2, i64 1
%6 = insertelement <4 x i16> %5, i16 %3, i64 2
%7 = insertelement <4 x i16> %6, i16 %4, i64 3
%8 = sitofp <4 x i16> %7 to <4 x float>
ret <4 x float> %8
}
define <4 x float> @hi(ptr nocapture noundef nonnull readonly align 2 dereferenceable(16) %v) {
entry:
%arrayidx = getelementptr inbounds i8, ptr %v, i64 2
%0 = load i16, ptr %arrayidx, align 2
%arrayidx1 = getelementptr inbounds i8, ptr %v, i64 6
%1 = load i16, ptr %arrayidx1, align 2
%arrayidx3 = getelementptr inbounds i8, ptr %v, i64 10
%2 = load i16, ptr %arrayidx3, align 2
%arrayidx5 = getelementptr inbounds i8, ptr %v, i64 14
%3 = load i16, ptr %arrayidx5, align 2
%4 = insertelement <4 x i16> poison, i16 %0, i64 0
%5 = insertelement <4 x i16> %4, i16 %1, i64 1
%6 = insertelement <4 x i16> %5, i16 %2, i64 2
%7 = insertelement <4 x i16> %6, i16 %3, i64 3
%8 = sitofp <4 x i16> %7 to <4 x float>
ret <4 x float> %8
}
```
```asm
lo(PayLoad&): # @lo(PayLoad&)
movdqu (%rdi), %xmm0
pinsrw $1, 4(%rdi), %xmm0
pinsrw $2, 8(%rdi), %xmm0
pinsrw $3, 12(%rdi), %xmm0
pmovsxwd %xmm0, %xmm0
cvtdq2ps %xmm0, %xmm0
retq
hi(PayLoad&): # @hi(PayLoad&)
movdqu (%rdi), %xmm0
psrld $16, %xmm0
pinsrw $1, 6(%rdi), %xmm0
pinsrw $2, 10(%rdi), %xmm0
pinsrw $3, 14(%rdi), %xmm0
pmovsxwd %xmm0, %xmm0
cvtdq2ps %xmm0, %xmm0
retq
```
https://gcc.godbolt.org/z/3scEbnGf9
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy8V1tv4jgb_jXm5lVRbOfEBRcz7fBdfCvtanelvaxMbMAjx87YDof--pUNCQmlLXRGi6qC8x6ex-8pNnNOrrUQc5R9RdnThLV-Y-z8z___JWujJ0vDD3OUJ8e_CiVPKPmCCJW6Ui0XgOhj5TyX2iP67Zp0X-ZSeyv1dHPWiP-dt23l4Q92-M0wfpIUXzuVmdQe588e9ij7SlD2hOhr2eEd2cs7st0rWRF-w5Df83ONSQnKIFKOuQIi-RaRGfR0AQCs8K3V8FzXz054-9w4RMrtNNBPAhR5hO30MFq9jFa7fjUb8Rqy2cifZ4NHbPCIDR6xwW-x6SviuPxHgGurSji3apU6gAq0_EaAMjvhPAglaqE9MAcMghV9LAH2IHEeiiJPYCsqb2xAXrY-mGpYManAG7CCKekE-A2LEmibRtjOp4OKaVgKEHtvWeUFR2ThNu1qpQSHlTV1tHGsFieQyG4Kf28EIFKEgBawZVYy7YEb4TQihe9RdyICfG-dP2_r6Gg6rJbQHYrpNTz8TuGhZrbaIPq0L_OHPH3Ykj5UfeSUOj7gYiV1bJUU9rBShoVWApQmsfIab0GbijW-tQK0aTUXK9BG61apwJIbrQ7AlFxrIMCFFSthha4EWyqBSInzWBskG9eI0N4eEP3SVQwiWQKIPh03GRPU54c8QqBx9PHYYQ0scbQ8Rf0U5ZGL6D7Yjp82RjqjT8_D9iUlUZs-SkogWoQfZ703V91siXwOkc9a-FORBPZSL0PwHMjyckMyTyEdmJNzIALRs_bh-vZf7ocrB-b0LbiX63C7--Hw0D59C293HS-L-lI7YTvIPl99dnGEwnmMXw878JLf4iUbeKGdlyGX4hYv-cBL2nmhAy_lsV6lN6vmtXkRps5FP56MrfCvO5Vk5eV4_KCz4xT_DzubWcsOku_vLxxyfT6M66Zzf718Oim-Hz1_NWPeQcfvw9NPdE1yy1DoAV7jD6TZJ-DTW4ZED3B9--mHDTMYp8eWSToCyb1DIB14wb9kCJBfMgToLx0CN0-B8SGpXzJXH5_EF3x3_iV5PGnFw9vVDyK0PxWMjfpDX_jUZst_tEG9RCSzXAaN8IYl2b6uk7FyI7Wzu6Ccxnyl91rF_JT3WsV8YHKbWW22br_j5zAcld7Sr7ae_yCN-1DRCv_j-CSO43vzcMXoJ_LgrOJwzEN-c7byT2ULJ59L14210aXr5jx9lNdzni4aauN948KLjiwQWayraro2fGmUnxq7RmTxgsiCuurbUv9vNZvwOeUzOmMTMccFpmmOSZZNNnNc8XTGq5zylGCcM1LxcsbZkvKkLFa4nMg5SUiapLjEBSloOqWUirRYkjTFRcUFRWkiaibVVKltHbAn0rlWzMsZyWcTxZZCuXi9JqSWzgn-YBova_nCvAyDN1xEJ3YerB-W7dqFJpfOu7M_L70S8wWTKhwVvAEWL1uXN6F422GXJ4Xu2tMYqb2wk9aq-UXopN-0y2llakQWAfT09dBY811UHpFF3JFDZBE39W8AAAD__5vJdHg">