[llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`

Craig Topper via llvm-dev llvm-dev at lists.llvm.org
Tue Jan 26 12:06:08 PST 2021


Which was flagged in post-commit review about a year later.
https://reviews.llvm.org/D58475

~Craig


On Tue, Jan 26, 2021 at 12:04 PM Craig Topper <craig.topper at gmail.com>
wrote:

> There's a typo in this line of X86InstrCompiler.td in llvm 10. It should
> be shiftMask64 not shiftMask32.
>
> // (shift x (and y, 63)) ==> (shift x, y)
> def : Pat<(frag GR64:$src1, GR64:$src2, (shiftMask32 CL)),
> (!cast<Instruction>(name # "64rrCL") GR64:$src1, GR64:$src2)>;
>
>
> ~Craig
>
>
> On Tue, Jan 26, 2021 at 11:57 AM Riyaz Puthiyapurayil via llvm-dev <
> llvm-dev at lists.llvm.org> wrote:
>
>> Curiously, for the following test, with -fsanitize=undefined -O3,
>> clang-10 reports no UB errors and produces the correct run-time result. But
>> without -fsanitize=undefined, it produces wrong output. Clang (trunk)
>> produces correct results either way. I think there is definitely a bug that
>> was fixed recently (in the last year or so). Could it be this one:
>>
>> https://reviews.llvm.org/D75748?id=
>>
>> Roman, you were a reviewer for this.
>>
>> -----Original Message-----
>> From: Riyaz Puthiyapurayil
>> Sent: Tuesday, January 26, 2021 11:48 AM
>> To: 'Roman Lebedev' <lebedev.ri at gmail.com>
>> Cc: llvm-dev at lists.llvm.org
>> Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with
>> an invalid shift count for `shrd`
>>
>> My bad! There was a typo that caused the UBSAN error.  Here is the
>> corrected test. This time UBSAN reports no errors. But clang-10 still
>> produces unexpected result whereas trunk does not.
>>
>> #include <stdio.h>
>> #include <stdint.h>
>> #include <string.h>
>>
>> __attribute__ ((noinline)) uint64_t  foo4(__int128_t* arr1) {
>>     unsigned chunk = 34 >> 5;
>>     unsigned bitIndex = 34 - chunk * 32;
>>
>>     unsigned* arr = (unsigned *)arr1 + chunk;
>>     __int128_t v1 = *arr1;
>>     v1 = v1 >> bitIndex;
>>
>>     return (uint64_t) v1;
>> }
>>
>> __attribute__ ((noinline)) uint64_t  foo3(__int128_t* arr1, unsigned
>> index) {
>>     unsigned chunk = index >> 5;
>>     unsigned bitIndex = index - chunk * 32;
>>
>>     unsigned *arr = (unsigned *)arr1 + chunk;
>>     __int128_t v1 = *arr1;
>>     v1 = v1 >> bitIndex;
>>
>>     return (uint64_t) v1;
>> }
>>
>> int main() {
>>
>>     unsigned arr[4];
>>
>>     arr[0]= 0x6f7cfd6f;
>>     arr[1]= 0xd96c9806;
>>     arr[2]= 0x89420144;
>>     arr[3]= 0x8a20f548;
>>
>>     __int128_t arr1;
>>
>>     memcpy(&arr1, arr, sizeof(arr1));
>>
>>     printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff);
>>     printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff); }
>>
>> -----Original Message-----
>> From: Roman Lebedev <lebedev.ri at gmail.com>
>> Sent: Tuesday, January 26, 2021 11:31 AM
>> To: Riyaz Puthiyapurayil <riyaz at synopsys.com>
>> Cc: llvm-dev at lists.llvm.org
>> Subject: Re: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with
>> an invalid shift count for `shrd`
>>
>>
>> https://urldefense.com/v3/__https://godbolt.org/z/5bcsj9__;!!A4F2R9G_pg!P3wQMqAfQM_jzlQy2F-1LD2x1ekeAKGQckArJiV5UsQIR2NhtjHbH5ivGPqtfl8bZLzgtjq5KIG7$
>>
>> example.c:43:37: runtime error: subtraction of unsigned offset from
>> 0x000000000022 overflowed to 0x7fffd01217c8
>> SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior example.c:43:37 in
>> example.c:19:37: runtime error: applying non-zero offset
>> 140576899311424 to null pointer
>> SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior example.c:19:37 in
>> foo3 19bdf3f5b
>> foo4 19bdf3f5b
>>
>> On Tue, Jan 26, 2021 at 10:29 PM Riyaz Puthiyapurayil <
>> Riyaz.Puthiyapurayil at synopsys.com> wrote:
>> >
>> > Apologies, outlook messed up the formatting in the previous email. Here
>> is it again.
>> >
>> >
>> >
>> > Do you see any UB in the following modified test? Here also, I see a
>> difference between foo3 and foo4. Again, trunk and gcc match. But not
>> Clang-10.
>> >
>> >
>> >
>> > #include <stdio.h>
>> >
>> > #include <stdint.h>
>> >
>> > #include <string.h>
>> >
>> >
>> >
>> > __attribute__ ((noinline)) uint64_t  foo4(__int128_t* arr1)
>> >
>> > {
>> >
>> >     unsigned chunk = 34 >> 5;
>> >
>> >     unsigned bitIndex = 34 - chunk * 32;
>> >
>> >
>> >
>> >     unsigned* arr = (unsigned *)arr + chunk;
>> >
>> >     __int128_t v1 = *arr1;
>> >
>> >     v1 = v1 >> bitIndex;
>> >
>> >
>> >
>> >     return (uint64_t) v1;
>> >
>> > }
>> >
>> >
>> >
>> > __attribute__ ((noinline)) uint64_t  foo3(__int128_t* arr1, unsigned
>> > index)
>> >
>> > {
>> >
>> >     unsigned chunk = index >> 5;
>> >
>> >     unsigned bitIndex = index - chunk * 32;
>> >
>> >
>> >
>> >     unsigned *arr = (unsigned *)arr + chunk;
>> >
>> >     __int128_t v1 = *arr1;
>> >
>> >     v1 = v1 >> bitIndex;
>> >
>> >
>> >
>> >     return (uint64_t) v1;
>> >
>> > }
>> >
>> >
>> >
>> > int main() {
>> >
>> >
>> >
>> >     unsigned arr[4];
>> >
>> >
>> >
>> >     arr[0]= 0x6f7cfd6f;
>> >
>> >     arr[1]= 0xd96c9806;
>> >
>> >     arr[2]= 0x89420144;
>> >
>> >     arr[3]= 0x8a20f548;
>> >
>> >
>> >
>> >     __int128_t arr1;
>> >
>> >
>> >
>> >     memcpy(&arr1, arr, sizeof(arr1));
>> >
>> >
>> >
>> >     printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff);
>> >
>> >     printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff);
>> >
>> > }
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> > From: Riyaz Puthiyapurayil
>> > Sent: Tuesday, January 26, 2021 11:25 AM
>> > To: 'Roman Lebedev' <lebedev.ri at gmail.com>
>> > Cc: llvm-dev at lists.llvm.org
>> > Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64
>> > with an invalid shift count for `shrd`
>> >
>> >
>> >
>> > Do you see any UB in the following modified test? Here also, I see a
>> > difference between foo3 and foo4. Again, trunk and gcc match. But not
>> > Clang-10
>> >
>> >
>> >
>> > #include <stdio.h>
>> >
>> > #include <stdint.h>
>> >
>> > #include <string.h>
>> >
>> >
>> >
>> > __attribute__ ((noinline)) uint64_t  foo4(__int128_t* arr1)
>> >
>> > {
>> >
>> >     unsigned chunk = 34 >> 5;
>> >
>> >     unsigned bitIndex = 34 - chunk * 32;
>> >
>> >
>> >
>> >     unsigned* arr = (unsigned *)arr + chunk;
>> >
>> >     __int128_t v1 = *arr1;
>> >
>> >     v1 = v1 >> bitIndex;
>> >
>> >
>> >
>> >     return (uint64_t) v1;
>> >
>> > }
>> >
>> >
>> >
>> > __attribute__ ((noinline)) uint64_t  foo3(__int128_t* arr1, unsigned
>> > index)
>> >
>> > {
>> >
>> >     unsigned chunk = index >> 5;
>> >
>> >     unsigned bitIndex = index - chunk * 32;
>> >
>> >
>> >
>> >     unsigned *arr = (unsigned *)arr + chunk;
>> >
>> >     __int128_t v1 = *arr1;
>> >
>> >     v1 = v1 >> bitIndex;
>> >
>> >
>> >
>> >     return (uint64_t) v1;
>> >
>> > }
>> >
>> >
>> >
>> > int main() {
>> >
>> >
>> >
>> >     unsigned arr[4];
>> >
>> >
>> >
>> >     arr[0]= 0x6f7cfd6f;
>> >
>> >     arr[1]= 0xd96c9806;
>> >
>> >     arr[2]= 0x89420144;
>> >
>> >     arr[3]= 0x8a20f548;
>> >
>> >
>> >
>> >     __int128_t arr1;
>> >
>> >
>> >
>> >     memcpy(&arr1, arr, sizeof(arr1));
>> >
>> >
>> >
>> >     printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff); // prints
>> > 1365b2601 (clang-10) 19bdf3f5b (gcc, clang-12)
>> >
>> >     printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff);    // prints
>> 19bdf3f5b
>> >
>> > }
>> >
>> >
>> >
>> > -----Original Message-----
>> > From: Riyaz Puthiyapurayil
>> > Sent: Tuesday, January 26, 2021 11:10 AM
>> > To: 'Roman Lebedev' <lebedev.ri at gmail.com>
>> > Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64
>> > with an invalid shift count for `shrd`
>> >
>> >
>> >
>> > You may be right about that the cast from unsigned* to __int128_t*
>> being UB. But I think that has nothing to do with the bug. That code can be
>> fixed to use a __int128_t array and change all the casts from __int128_t*
>> to unsigned* (instead of the other way around) and we would still have the
>> issue.
>> >
>> >
>> >
>> > -----Original Message-----
>> >
>> > From: Roman Lebedev <lebedev.ri at gmail.com>
>> >
>> > Sent: Tuesday, January 26, 2021 10:34 AM
>> >
>> > To: Riyaz Puthiyapurayil <riyaz at synopsys.com>
>> >
>> > Cc: llvm-dev at lists.llvm.org
>> >
>> > Subject: Re: [llvm-dev] LLVM 10 is miscompiling this code on x86_64
>> > with an invalid shift count for `shrd`
>> >
>> >
>> >
>> > On Tue, Jan 26, 2021 at 9:28 PM Riyaz Puthiyapurayil via llvm-dev <
>> llvm-dev at lists.llvm.org> wrote:
>> >
>> > >
>> >
>> > > It appears that LLVM 10 is miscompiling the following test on x86_64
>> but the trunk version seems to work fine. Before I spend any time debugging
>> this, it would be helpful if someone can point me to a bug fix if there was
>> one.
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > > #include <stdio.h>
>> >
>> > >
>> >
>> > > #include <stdint.h>
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > > __attribute__ ((noinline)) uint64_t  foo4(unsigned* arr)
>> >
>> > >
>> >
>> > > {
>> >
>> > >
>> >
>> > >     unsigned chunk = 34 >> 5;
>> >
>> > >
>> >
>> > >     unsigned bitIndex = 34 - chunk * 32;
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > >     arr = arr + chunk;
>> >
>> > >
>> >
>> > >     __int128_t* arr1 = (__int128_t*) arr;
>> >
>> > I'm pretty sure this is UB, because the alignment of __int128_t is
>> bigger than that of unsigned.
>> >
>> >
>> >
>> > >
>> >
>> > >     __int128_t v1 = *arr1;
>> >
>> > >
>> >
>> > >     v1 = v1 >> bitIndex;
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > >     return (uint64_t) v1;
>> >
>> > >
>> >
>> > > }
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > > __attribute__ ((noinline)) uint64_t  foo3(unsigned* arr, unsigned
>> >
>> > > index)
>> >
>> > >
>> >
>> > > {
>> >
>> > >
>> >
>> > >     unsigned chunk = index >> 5;
>> >
>> > >
>> >
>> > >     unsigned bitIndex = index - chunk * 32;
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > >     arr = arr + chunk;
>> >
>> > >
>> >
>> > >     __int128_t* arr1 = (__int128_t*) arr;
>> >
>> > >
>> >
>> > >     __int128_t v1 = *arr1;
>> >
>> > >
>> >
>> > >     v1 = v1 >> bitIndex;
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > >     return (uint64_t) v1;
>> >
>> > >
>> >
>> > > }
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > > int main() {
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > >     unsigned arr[5];
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > >     arr[0]= 0x6f7cfd6f;
>> >
>> > >
>> >
>> > >     arr[1]= 0xd96c9806;
>> >
>> > >
>> >
>> > >     arr[2]= 0x89420144;
>> >
>> > >
>> >
>> > >     arr[3]= 0x8a20f548;
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > >     printf("foo3 %lx\n",foo3(arr,34) & 0x3ffffffff); // prints
>> >
>> > > 222508051 instead of 1365b2601
>> >
>> > >
>> >
>> > >     printf("foo4 %lx\n",foo4(arr) & 0x3ffffffff); // prints
>> > > 1365b2601
>> >
>> > > as expected
>> >
>> > >
>> >
>> > > }
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > > If you look at the assembly, there is an ‘and cl, 31’ missing in foo3
>> which is needed to make sure that the shift count is in the range [0...31].
>> This is missing in the generated code. Note that if the count in CL
>> register is greater than 31, x86_64 manual states that the behavior of shrd
>> is undefined.
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > > # clang 10
>> >
>> > >
>> >
>> > > foo3(unsigned int*, unsigned int):                             #
>> @foo3(unsigned int*, unsigned int)
>> >
>> > >
>> >
>> > >         mov     ecx, esi
>> >
>> > >
>> >
>> > >         mov     edx, esi
>> >
>> > >
>> >
>> > >         shr     edx, 5
>> >
>> > >
>> >
>> > >         mov     rax, qword ptr [rdi + 4*rdx]
>> >
>> > >
>> >
>> > >         mov     rdx, qword ptr [rdi + 4*rdx + 8]
>> >
>> > >
>> >
>> > >         shrd    rax, rdx, cl
>> >
>> > >
>> >
>> > >         ret
>> >
>> > >
>> >
>> > >
>> >
>> > >
>> >
>> > > # Trunk
>> >
>> > >
>> >
>> > > foo3(unsigned int*, unsigned int):                             #
>> @foo3(unsigned int*, unsigned int)
>> >
>> > >
>> >
>> > >         mov     ecx, esi
>> >
>> > >
>> >
>> > >         mov     edx, esi
>> >
>> > >
>> >
>> > >         shr     edx, 5
>> >
>> > >
>> >
>> > >         mov     rax, qword ptr [rdi + 4*rdx]
>> >
>> > >
>> >
>> > >         mov     rdx, qword ptr [rdi + 4*rdx + 8]
>> >
>> > >
>> >
>> > >         and     cl, 31
>> >
>> > >
>> >
>> > >         shrd    rax, rdx, cl
>> >
>> > >
>> >
>> > >         ret
>> >
>> > >
>> >
>> >
>> >
>> > Roman.
>> >
>> >
>> >
>> > > _______________________________________________
>> >
>> > > LLVM Developers mailing list
>> >
>> > > llvm-dev at lists.llvm.org
>> >
>> > > https://urldefense.com/v3/__https://lists.llvm.org/cgi-bin/mailman/l
>> > > is
>> >
>> > > tinfo/llvm-dev__;!!A4F2R9G_pg!Mq1ZgXCY7kwDMwNwdn4Tcz-KgkoXMlP_rjq7_U
>> > > 9M
>> >
>> > > rh1gbrtqvDoSi6cMuSLZaXIPgtijPW55JJ_8$
>> _______________________________________________
>> LLVM Developers mailing list
>> llvm-dev at lists.llvm.org
>> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20210126/fe413b6b/attachment-0001.html>


More information about the llvm-dev mailing list