<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/55949>55949</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[X86] SSE4A non-temporal v16f32 vector stores don't stay on the vector unit
</td>
</tr>
<tr>
<th>Labels</th>
<td>
backend:X86
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
RKSimon
</td>
</tr>
</table>
<pre>
SSE4A has movntsd/movntss to perform fast unaligned vector stores. But for some reason v16f32 stores get split and every other 64-bit goes via the gpr unit. All other 512-bit vectors appear unaffected.
```
define void @test_zero_v16f32_align1(ptr %dst) nounwind {
store <16 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v16i32_align1(ptr %dst) nounwind {
store <16 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
!1 = !{i32 1}
```
llc -mcpu=btver1
```
test_zero_v16f32_align1: # @test_zero_v16f32_align1
xorl %eax, %eax
movntiq %rax, 24(%rdi)
movntiq %rax, 8(%rdi)
movntiq %rax, 56(%rdi)
movntiq %rax, 40(%rdi)
xorps %xmm0, %xmm0
movntsd %xmm0, 16(%rdi)
movntsd %xmm0, (%rdi)
movntsd %xmm0, 48(%rdi)
movntsd %xmm0, 32(%rdi)
retq
test_zero_v16i32_align1: # @test_zero_v16i32_align1
xorps %xmm0, %xmm0
movntsd %xmm0, 24(%rdi)
movntsd %xmm0, 16(%rdi)
movntsd %xmm0, 8(%rdi)
movntsd %xmm0, (%rdi)
movntsd %xmm0, 56(%rdi)
movntsd %xmm0, 48(%rdi)
movntsd %xmm0, 40(%rdi)
movntsd %xmm0, 32(%rdi)
retq
```
https://gcc.godbolt.org/z/v8vnWb5zj
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy9Vk1zmzAQ_TXismMGxIfhwCGJ00tvzaG9ZQQIrFQgIgma5Nd3BaSN0zgeJzNlMNbH0-q93UVLqerH4ubmOr6APTPQqam3pib0y9IyYBUMXDdKd9AwY2HsmRRtz2uYeGWVBoMPbny4HC00rq86Dpozo3qYwrSJ6AqBllswgxQWWF8Dn7h-BGX3XEMab0ocbhWiJsEAB6EdNG4mrA8XUq64JKQzcNnaABsGzhyMNQ0O8donwY4EF-szDdZ77ta8ET2HSYkaSBxYbuztE9fqdmF5O-sKCc0Gq4HQpDaW0Bx6Nfa_BBIm28vFECyCgERXYQoP0EjFLImuwVkTSFmgKWwTegUvbV3BvAWErklo2Kve8m5QmknXDZ-ta_STY7mq2O5einpPhfiMClz8vzQ4IO67cyuQD-6M9v5ADqMmZQWbrhpGxJcWcyZ8E3YsmhFyObwIjd4L_8p_uR6UlsuahLOHRfLcOkDNr4q4d3N6QdEYA-C6tUDnn0Jn54CT9Bx0HBxHo7jBLOIeui5Y1c3Nf62a-iUuPEXiEH4WOD7ljkM4Zu1ROCbh_RvpIc5ND3E8PT7qwZMZ8imHn-fCs8An8-9T0XwvXz8c_FdHxd7awWDkscjh3VaV36q6VNL6Src48oS_KZv672XydOfVRVTnUc48K6zkBUkuf2QpSXawFE08_zZ_DsC13B0URqhVT-gWK59lWO76ubatCFfevFHL4hUlYfdj6Veqw46U0_PfZtDqDldiVxgzcoONJMnj3NsXUZRmeRQwymiWZNk2TLMsynkSZTyItk3oSVZyaRx_QmnJqp-8r3FHJ4ZS1OOJggaUBmmQhXmQBZEfNk1WV3GYpmlWpk2FbwXvmJC-4-J85eliplWOrcFJKYw1fyeZMe47YXaZs89Gu1e6-Pb1RnSq92YFxUz_N107a1I">