[llvm] [X86] Adding lowerings for vector ISD::LRINT and ISD::LLRINT (PR #90065)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 1 07:57:38 PDT 2024
fhahn wrote:
It looks like this change is causing different results in some cases when using `@llvm.lrint.v4i64.v4f32`.
For
```
define i32 @foo(ptr %dst, <4 x float> %in) {
entry:
%0 = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %in)
%1 = trunc <4 x i64> %0 to <4 x i32>
store <4 x i32> %1, ptr %dst, align 4
ret i32 0
}
```
we now generate the assembly below, which does the conversion to signed double word.
```
_foo: ## @foo
cvtps2dq xmm0, xmm0
movupd xmmword ptr [rdi], xmm0
xor eax, eax
ret
```
Before this change, we generated the assembly below, which does the conversion to signed quad word, then truncating the result
```
_foo: ## @foo
movaps xmm1, xmm0
shufps xmm1, xmm0, 85 ## xmm1 = xmm1[1,1],xmm0[1,1]
cvtss2si rax, xmm1
cvtss2si rcx, xmm0
movaps xmm1, xmm0
unpckhpd xmm1, xmm0 ## xmm1 = xmm1[1],xmm0[1]
cvtss2si rdx, xmm1
shufps xmm0, xmm0, 255 ## xmm0 = xmm0[3,3,3,3]
cvtss2si rsi, xmm0
movd xmm0, esi
movd xmm1, edx
punpckldq xmm1, xmm0 ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
movd xmm0, ecx
movd xmm2, eax
punpckldq xmm0, xmm2 ## xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
punpcklqdq xmm0, xmm1 ## xmm0 = xmm0[0],xmm1[0]
movdqu xmmword ptr [rdi], xmm0
xor eax, eax
ret
```
I might be missing something, but I think for inputs like 2^33 new codegen will overflow, while the second won't.
See https://llvm.godbolt.org/z/E7W89q59M for a comparison `main` vs `18.x`
https://github.com/llvm/llvm-project/pull/90065
More information about the llvm-commits
mailing list