[llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`

Riyaz Puthiyapurayil via llvm-dev llvm-dev at lists.llvm.org
Tue Jan 26 11:57:00 PST 2021


Curiously, for the following test, with -fsanitize=undefined -O3, clang-10 reports no UB errors and produces the correct run-time result. But without -fsanitize=undefined, it produces wrong output. Clang (trunk) produces correct results either way. I think there is definitely a bug that was fixed recently (in the last year or so). Could it be this one:

https://reviews.llvm.org/D75748?id=

Roman, you were a reviewer for this.

-----Original Message-----
From: Riyaz Puthiyapurayil 
Sent: Tuesday, January 26, 2021 11:48 AM
To: 'Roman Lebedev' <lebedev.ri at gmail.com>
Cc: llvm-dev at lists.llvm.org
Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`

My bad! There was a typo that caused the UBSAN error.  Here is the corrected test. This time UBSAN reports no errors. But clang-10 still produces unexpected result whereas trunk does not.

#include <stdio.h>
#include <stdint.h>
#include <string.h>

__attribute__ ((noinline)) uint64_t  foo4(__int128_t* arr1) {
    unsigned chunk = 34 >> 5;
    unsigned bitIndex = 34 - chunk * 32;

    unsigned* arr = (unsigned *)arr1 + chunk;
    __int128_t v1 = *arr1;
    v1 = v1 >> bitIndex;

    return (uint64_t) v1;
}

__attribute__ ((noinline)) uint64_t  foo3(__int128_t* arr1, unsigned index) {
    unsigned chunk = index >> 5;
    unsigned bitIndex = index - chunk * 32;

    unsigned *arr = (unsigned *)arr1 + chunk;
    __int128_t v1 = *arr1;
    v1 = v1 >> bitIndex;

    return (uint64_t) v1;
}

int main() {

    unsigned arr[4];

    arr[0]= 0x6f7cfd6f;
    arr[1]= 0xd96c9806;
    arr[2]= 0x89420144;
    arr[3]= 0x8a20f548;

    __int128_t arr1;

    memcpy(&arr1, arr, sizeof(arr1));

    printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff);   
    printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff); }

-----Original Message-----
From: Roman Lebedev <lebedev.ri at gmail.com>
Sent: Tuesday, January 26, 2021 11:31 AM
To: Riyaz Puthiyapurayil <riyaz at synopsys.com>
Cc: llvm-dev at lists.llvm.org
Subject: Re: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`

https://urldefense.com/v3/__https://godbolt.org/z/5bcsj9__;!!A4F2R9G_pg!P3wQMqAfQM_jzlQy2F-1LD2x1ekeAKGQckArJiV5UsQIR2NhtjHbH5ivGPqtfl8bZLzgtjq5KIG7$ 

example.c:43:37: runtime error: subtraction of unsigned offset from
0x000000000022 overflowed to 0x7fffd01217c8
SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior example.c:43:37 in
example.c:19:37: runtime error: applying non-zero offset
140576899311424 to null pointer
SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior example.c:19:37 in
foo3 19bdf3f5b
foo4 19bdf3f5b

On Tue, Jan 26, 2021 at 10:29 PM Riyaz Puthiyapurayil <Riyaz.Puthiyapurayil at synopsys.com> wrote:
>
> Apologies, outlook messed up the formatting in the previous email. Here is it again.
>
>
>
> Do you see any UB in the following modified test? Here also, I see a difference between foo3 and foo4. Again, trunk and gcc match. But not Clang-10.
>
>
>
> #include <stdio.h>
>
> #include <stdint.h>
>
> #include <string.h>
>
>
>
> __attribute__ ((noinline)) uint64_t  foo4(__int128_t* arr1)
>
> {
>
>     unsigned chunk = 34 >> 5;
>
>     unsigned bitIndex = 34 - chunk * 32;
>
>
>
>     unsigned* arr = (unsigned *)arr + chunk;
>
>     __int128_t v1 = *arr1;
>
>     v1 = v1 >> bitIndex;
>
>
>
>     return (uint64_t) v1;
>
> }
>
>
>
> __attribute__ ((noinline)) uint64_t  foo3(__int128_t* arr1, unsigned
> index)
>
> {
>
>     unsigned chunk = index >> 5;
>
>     unsigned bitIndex = index - chunk * 32;
>
>
>
>     unsigned *arr = (unsigned *)arr + chunk;
>
>     __int128_t v1 = *arr1;
>
>     v1 = v1 >> bitIndex;
>
>
>
>     return (uint64_t) v1;
>
> }
>
>
>
> int main() {
>
>
>
>     unsigned arr[4];
>
>
>
>     arr[0]= 0x6f7cfd6f;
>
>     arr[1]= 0xd96c9806;
>
>     arr[2]= 0x89420144;
>
>     arr[3]= 0x8a20f548;
>
>
>
>     __int128_t arr1;
>
>
>
>     memcpy(&arr1, arr, sizeof(arr1));
>
>
>
>     printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff);
>
>     printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff);
>
> }
>
>
>
>
>
>
>
> From: Riyaz Puthiyapurayil
> Sent: Tuesday, January 26, 2021 11:25 AM
> To: 'Roman Lebedev' <lebedev.ri at gmail.com>
> Cc: llvm-dev at lists.llvm.org
> Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 
> with an invalid shift count for `shrd`
>
>
>
> Do you see any UB in the following modified test? Here also, I see a 
> difference between foo3 and foo4. Again, trunk and gcc match. But not
> Clang-10
>
>
>
> #include <stdio.h>
>
> #include <stdint.h>
>
> #include <string.h>
>
>
>
> __attribute__ ((noinline)) uint64_t  foo4(__int128_t* arr1)
>
> {
>
>     unsigned chunk = 34 >> 5;
>
>     unsigned bitIndex = 34 - chunk * 32;
>
>
>
>     unsigned* arr = (unsigned *)arr + chunk;
>
>     __int128_t v1 = *arr1;
>
>     v1 = v1 >> bitIndex;
>
>
>
>     return (uint64_t) v1;
>
> }
>
>
>
> __attribute__ ((noinline)) uint64_t  foo3(__int128_t* arr1, unsigned
> index)
>
> {
>
>     unsigned chunk = index >> 5;
>
>     unsigned bitIndex = index - chunk * 32;
>
>
>
>     unsigned *arr = (unsigned *)arr + chunk;
>
>     __int128_t v1 = *arr1;
>
>     v1 = v1 >> bitIndex;
>
>
>
>     return (uint64_t) v1;
>
> }
>
>
>
> int main() {
>
>
>
>     unsigned arr[4];
>
>
>
>     arr[0]= 0x6f7cfd6f;
>
>     arr[1]= 0xd96c9806;
>
>     arr[2]= 0x89420144;
>
>     arr[3]= 0x8a20f548;
>
>
>
>     __int128_t arr1;
>
>
>
>     memcpy(&arr1, arr, sizeof(arr1));
>
>
>
>     printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff); // prints
> 1365b2601 (clang-10) 19bdf3f5b (gcc, clang-12)
>
>     printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff);    // prints 19bdf3f5b
>
> }
>
>
>
> -----Original Message-----
> From: Riyaz Puthiyapurayil
> Sent: Tuesday, January 26, 2021 11:10 AM
> To: 'Roman Lebedev' <lebedev.ri at gmail.com>
> Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 
> with an invalid shift count for `shrd`
>
>
>
> You may be right about that the cast from unsigned* to __int128_t* being UB. But I think that has nothing to do with the bug. That code can be fixed to use a __int128_t array and change all the casts from __int128_t* to unsigned* (instead of the other way around) and we would still have the issue.
>
>
>
> -----Original Message-----
>
> From: Roman Lebedev <lebedev.ri at gmail.com>
>
> Sent: Tuesday, January 26, 2021 10:34 AM
>
> To: Riyaz Puthiyapurayil <riyaz at synopsys.com>
>
> Cc: llvm-dev at lists.llvm.org
>
> Subject: Re: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 
> with an invalid shift count for `shrd`
>
>
>
> On Tue, Jan 26, 2021 at 9:28 PM Riyaz Puthiyapurayil via llvm-dev <llvm-dev at lists.llvm.org> wrote:
>
> >
>
> > It appears that LLVM 10 is miscompiling the following test on x86_64 but the trunk version seems to work fine. Before I spend any time debugging this, it would be helpful if someone can point me to a bug fix if there was one.
>
> >
>
> >
>
> >
>
> > #include <stdio.h>
>
> >
>
> > #include <stdint.h>
>
> >
>
> >
>
> >
>
> > __attribute__ ((noinline)) uint64_t  foo4(unsigned* arr)
>
> >
>
> > {
>
> >
>
> >     unsigned chunk = 34 >> 5;
>
> >
>
> >     unsigned bitIndex = 34 - chunk * 32;
>
> >
>
> >
>
> >
>
> >     arr = arr + chunk;
>
> >
>
> >     __int128_t* arr1 = (__int128_t*) arr;
>
> I'm pretty sure this is UB, because the alignment of __int128_t is bigger than that of unsigned.
>
>
>
> >
>
> >     __int128_t v1 = *arr1;
>
> >
>
> >     v1 = v1 >> bitIndex;
>
> >
>
> >
>
> >
>
> >     return (uint64_t) v1;
>
> >
>
> > }
>
> >
>
> >
>
> >
>
> > __attribute__ ((noinline)) uint64_t  foo3(unsigned* arr, unsigned
>
> > index)
>
> >
>
> > {
>
> >
>
> >     unsigned chunk = index >> 5;
>
> >
>
> >     unsigned bitIndex = index - chunk * 32;
>
> >
>
> >
>
> >
>
> >     arr = arr + chunk;
>
> >
>
> >     __int128_t* arr1 = (__int128_t*) arr;
>
> >
>
> >     __int128_t v1 = *arr1;
>
> >
>
> >     v1 = v1 >> bitIndex;
>
> >
>
> >
>
> >
>
> >     return (uint64_t) v1;
>
> >
>
> > }
>
> >
>
> >
>
> >
>
> >
>
> >
>
> > int main() {
>
> >
>
> >
>
> >
>
> >     unsigned arr[5];
>
> >
>
> >
>
> >
>
> >     arr[0]= 0x6f7cfd6f;
>
> >
>
> >     arr[1]= 0xd96c9806;
>
> >
>
> >     arr[2]= 0x89420144;
>
> >
>
> >     arr[3]= 0x8a20f548;
>
> >
>
> >
>
> >
>
> >     printf("foo3 %lx\n",foo3(arr,34) & 0x3ffffffff); // prints
>
> > 222508051 instead of 1365b2601
>
> >
>
> >     printf("foo4 %lx\n",foo4(arr) & 0x3ffffffff); // prints
> > 1365b2601
>
> > as expected
>
> >
>
> > }
>
> >
>
> >
>
> >
>
> > If you look at the assembly, there is an ‘and cl, 31’ missing in foo3 which is needed to make sure that the shift count is in the range [0...31]. This is missing in the generated code. Note that if the count in CL register is greater than 31, x86_64 manual states that the behavior of shrd is undefined.
>
> >
>
> >
>
> >
>
> > # clang 10
>
> >
>
> > foo3(unsigned int*, unsigned int):                             # @foo3(unsigned int*, unsigned int)
>
> >
>
> >         mov     ecx, esi
>
> >
>
> >         mov     edx, esi
>
> >
>
> >         shr     edx, 5
>
> >
>
> >         mov     rax, qword ptr [rdi + 4*rdx]
>
> >
>
> >         mov     rdx, qword ptr [rdi + 4*rdx + 8]
>
> >
>
> >         shrd    rax, rdx, cl
>
> >
>
> >         ret
>
> >
>
> >
>
> >
>
> > # Trunk
>
> >
>
> > foo3(unsigned int*, unsigned int):                             # @foo3(unsigned int*, unsigned int)
>
> >
>
> >         mov     ecx, esi
>
> >
>
> >         mov     edx, esi
>
> >
>
> >         shr     edx, 5
>
> >
>
> >         mov     rax, qword ptr [rdi + 4*rdx]
>
> >
>
> >         mov     rdx, qword ptr [rdi + 4*rdx + 8]
>
> >
>
> >         and     cl, 31
>
> >
>
> >         shrd    rax, rdx, cl
>
> >
>
> >         ret
>
> >
>
>
>
> Roman.
>
>
>
> > _______________________________________________
>
> > LLVM Developers mailing list
>
> > llvm-dev at lists.llvm.org
>
> > https://urldefense.com/v3/__https://lists.llvm.org/cgi-bin/mailman/l
> > is
>
> > tinfo/llvm-dev__;!!A4F2R9G_pg!Mq1ZgXCY7kwDMwNwdn4Tcz-KgkoXMlP_rjq7_U
> > 9M
>
> > rh1gbrtqvDoSi6cMuSLZaXIPgtijPW55JJ_8$


More information about the llvm-dev mailing list