[llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`
Riyaz Puthiyapurayil via llvm-dev
llvm-dev at lists.llvm.org
Tue Jan 26 11:29:32 PST 2021
Apologies, outlook messed up the formatting in the previous email. Here is it again.
Do you see any UB in the following modified test? Here also, I see a difference between foo3 and foo4. Again, trunk and gcc match. But not Clang-10.
#include <stdio.h>
#include <stdint.h>
#include <string.h>
__attribute__ ((noinline)) uint64_t foo4(__int128_t* arr1)
{
unsigned chunk = 34 >> 5;
unsigned bitIndex = 34 - chunk * 32;
unsigned* arr = (unsigned *)arr + chunk;
__int128_t v1 = *arr1;
v1 = v1 >> bitIndex;
return (uint64_t) v1;
}
__attribute__ ((noinline)) uint64_t foo3(__int128_t* arr1, unsigned index)
{
unsigned chunk = index >> 5;
unsigned bitIndex = index - chunk * 32;
unsigned *arr = (unsigned *)arr + chunk;
__int128_t v1 = *arr1;
v1 = v1 >> bitIndex;
return (uint64_t) v1;
}
int main() {
unsigned arr[4];
arr[0]= 0x6f7cfd6f;
arr[1]= 0xd96c9806;
arr[2]= 0x89420144;
arr[3]= 0x8a20f548;
__int128_t arr1;
memcpy(&arr1, arr, sizeof(arr1));
printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff);
printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff);
}
From: Riyaz Puthiyapurayil
Sent: Tuesday, January 26, 2021 11:25 AM
To: 'Roman Lebedev' <lebedev.ri at gmail.com>
Cc: llvm-dev at lists.llvm.org
Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`
Do you see any UB in the following modified test? Here also, I see a difference between foo3 and foo4. Again, trunk and gcc match. But not Clang-10
#include <stdio.h>
#include <stdint.h>
#include <string.h>
__attribute__ ((noinline)) uint64_t foo4(__int128_t* arr1)
{
unsigned chunk = 34 >> 5;
unsigned bitIndex = 34 - chunk * 32;
unsigned* arr = (unsigned *)arr + chunk;
__int128_t v1 = *arr1;
v1 = v1 >> bitIndex;
return (uint64_t) v1;
}
__attribute__ ((noinline)) uint64_t foo3(__int128_t* arr1, unsigned index)
{
unsigned chunk = index >> 5;
unsigned bitIndex = index - chunk * 32;
unsigned *arr = (unsigned *)arr + chunk;
__int128_t v1 = *arr1;
v1 = v1 >> bitIndex;
return (uint64_t) v1;
}
int main() {
unsigned arr[4];
arr[0]= 0x6f7cfd6f;
arr[1]= 0xd96c9806;
arr[2]= 0x89420144;
arr[3]= 0x8a20f548;
__int128_t arr1;
memcpy(&arr1, arr, sizeof(arr1));
printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff); // prints 1365b2601 (clang-10) 19bdf3f5b (gcc, clang-12)
printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff); // prints 19bdf3f5b
}
-----Original Message-----
From: Riyaz Puthiyapurayil
Sent: Tuesday, January 26, 2021 11:10 AM
To: 'Roman Lebedev' <lebedev.ri at gmail.com<mailto:lebedev.ri at gmail.com>>
Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`
You may be right about that the cast from unsigned* to __int128_t* being UB. But I think that has nothing to do with the bug. That code can be fixed to use a __int128_t array and change all the casts from __int128_t* to unsigned* (instead of the other way around) and we would still have the issue.
-----Original Message-----
From: Roman Lebedev <lebedev.ri at gmail.com<mailto:lebedev.ri at gmail.com>>
Sent: Tuesday, January 26, 2021 10:34 AM
To: Riyaz Puthiyapurayil <riyaz at synopsys.com<mailto:riyaz at synopsys.com>>
Cc: llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org>
Subject: Re: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`
On Tue, Jan 26, 2021 at 9:28 PM Riyaz Puthiyapurayil via llvm-dev <llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org>> wrote:
>
> It appears that LLVM 10 is miscompiling the following test on x86_64 but the trunk version seems to work fine. Before I spend any time debugging this, it would be helpful if someone can point me to a bug fix if there was one.
>
>
>
> #include <stdio.h>
>
> #include <stdint.h>
>
>
>
> __attribute__ ((noinline)) uint64_t foo4(unsigned* arr)
>
> {
>
> unsigned chunk = 34 >> 5;
>
> unsigned bitIndex = 34 - chunk * 32;
>
>
>
> arr = arr + chunk;
>
> __int128_t* arr1 = (__int128_t*) arr;
I'm pretty sure this is UB, because the alignment of __int128_t is bigger than that of unsigned.
>
> __int128_t v1 = *arr1;
>
> v1 = v1 >> bitIndex;
>
>
>
> return (uint64_t) v1;
>
> }
>
>
>
> __attribute__ ((noinline)) uint64_t foo3(unsigned* arr, unsigned
> index)
>
> {
>
> unsigned chunk = index >> 5;
>
> unsigned bitIndex = index - chunk * 32;
>
>
>
> arr = arr + chunk;
>
> __int128_t* arr1 = (__int128_t*) arr;
>
> __int128_t v1 = *arr1;
>
> v1 = v1 >> bitIndex;
>
>
>
> return (uint64_t) v1;
>
> }
>
>
>
>
>
> int main() {
>
>
>
> unsigned arr[5];
>
>
>
> arr[0]= 0x6f7cfd6f;
>
> arr[1]= 0xd96c9806;
>
> arr[2]= 0x89420144;
>
> arr[3]= 0x8a20f548;
>
>
>
> printf("foo3 %lx\n",foo3(arr,34) & 0x3ffffffff); // prints
> 222508051 instead of 1365b2601
>
> printf("foo4 %lx\n",foo4(arr) & 0x3ffffffff); // prints 1365b2601
> as expected
>
> }
>
>
>
> If you look at the assembly, there is an ‘and cl, 31’ missing in foo3 which is needed to make sure that the shift count is in the range [0...31]. This is missing in the generated code. Note that if the count in CL register is greater than 31, x86_64 manual states that the behavior of shrd is undefined.
>
>
>
> # clang 10
>
> foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int)
>
> mov ecx, esi
>
> mov edx, esi
>
> shr edx, 5
>
> mov rax, qword ptr [rdi + 4*rdx]
>
> mov rdx, qword ptr [rdi + 4*rdx + 8]
>
> shrd rax, rdx, cl
>
> ret
>
>
>
> # Trunk
>
> foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int)
>
> mov ecx, esi
>
> mov edx, esi
>
> shr edx, 5
>
> mov rax, qword ptr [rdi + 4*rdx]
>
> mov rdx, qword ptr [rdi + 4*rdx + 8]
>
> and cl, 31
>
> shrd rax, rdx, cl
>
> ret
>
Roman.
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org>
> https://urldefense.com/v3/__https://lists.llvm.org/cgi-bin/mailman/lis<https://urldefense.com/v3/__https:/lists.llvm.org/cgi-bin/mailman/lis>
> tinfo/llvm-dev__;!!A4F2R9G_pg!Mq1ZgXCY7kwDMwNwdn4Tcz-KgkoXMlP_rjq7_U9M
> rh1gbrtqvDoSi6cMuSLZaXIPgtijPW55JJ_8$
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20210126/33600961/attachment-0001.html>
More information about the llvm-dev
mailing list