[llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`
Craig Topper via llvm-dev
llvm-dev at lists.llvm.org
Tue Jan 26 12:04:28 PST 2021
There's a typo in this line of X86InstrCompiler.td in llvm 10. It should be
shiftMask64 not shiftMask32.
// (shift x (and y, 63)) ==> (shift x, y)
def : Pat<(frag GR64:$src1, GR64:$src2, (shiftMask32 CL)),
(!cast<Instruction>(name # "64rrCL") GR64:$src1, GR64:$src2)>;
~Craig
On Tue, Jan 26, 2021 at 11:57 AM Riyaz Puthiyapurayil via llvm-dev <
llvm-dev at lists.llvm.org> wrote:
> Curiously, for the following test, with -fsanitize=undefined -O3, clang-10
> reports no UB errors and produces the correct run-time result. But without
> -fsanitize=undefined, it produces wrong output. Clang (trunk) produces
> correct results either way. I think there is definitely a bug that was
> fixed recently (in the last year or so). Could it be this one:
>
> https://reviews.llvm.org/D75748?id=
>
> Roman, you were a reviewer for this.
>
> -----Original Message-----
> From: Riyaz Puthiyapurayil
> Sent: Tuesday, January 26, 2021 11:48 AM
> To: 'Roman Lebedev' <lebedev.ri at gmail.com>
> Cc: llvm-dev at lists.llvm.org
> Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with
> an invalid shift count for `shrd`
>
> My bad! There was a typo that caused the UBSAN error. Here is the
> corrected test. This time UBSAN reports no errors. But clang-10 still
> produces unexpected result whereas trunk does not.
>
> #include <stdio.h>
> #include <stdint.h>
> #include <string.h>
>
> __attribute__ ((noinline)) uint64_t foo4(__int128_t* arr1) {
> unsigned chunk = 34 >> 5;
> unsigned bitIndex = 34 - chunk * 32;
>
> unsigned* arr = (unsigned *)arr1 + chunk;
> __int128_t v1 = *arr1;
> v1 = v1 >> bitIndex;
>
> return (uint64_t) v1;
> }
>
> __attribute__ ((noinline)) uint64_t foo3(__int128_t* arr1, unsigned
> index) {
> unsigned chunk = index >> 5;
> unsigned bitIndex = index - chunk * 32;
>
> unsigned *arr = (unsigned *)arr1 + chunk;
> __int128_t v1 = *arr1;
> v1 = v1 >> bitIndex;
>
> return (uint64_t) v1;
> }
>
> int main() {
>
> unsigned arr[4];
>
> arr[0]= 0x6f7cfd6f;
> arr[1]= 0xd96c9806;
> arr[2]= 0x89420144;
> arr[3]= 0x8a20f548;
>
> __int128_t arr1;
>
> memcpy(&arr1, arr, sizeof(arr1));
>
> printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff);
> printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff); }
>
> -----Original Message-----
> From: Roman Lebedev <lebedev.ri at gmail.com>
> Sent: Tuesday, January 26, 2021 11:31 AM
> To: Riyaz Puthiyapurayil <riyaz at synopsys.com>
> Cc: llvm-dev at lists.llvm.org
> Subject: Re: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with
> an invalid shift count for `shrd`
>
>
> https://urldefense.com/v3/__https://godbolt.org/z/5bcsj9__;!!A4F2R9G_pg!P3wQMqAfQM_jzlQy2F-1LD2x1ekeAKGQckArJiV5UsQIR2NhtjHbH5ivGPqtfl8bZLzgtjq5KIG7$
>
> example.c:43:37: runtime error: subtraction of unsigned offset from
> 0x000000000022 overflowed to 0x7fffd01217c8
> SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior example.c:43:37 in
> example.c:19:37: runtime error: applying non-zero offset
> 140576899311424 to null pointer
> SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior example.c:19:37 in
> foo3 19bdf3f5b
> foo4 19bdf3f5b
>
> On Tue, Jan 26, 2021 at 10:29 PM Riyaz Puthiyapurayil <
> Riyaz.Puthiyapurayil at synopsys.com> wrote:
> >
> > Apologies, outlook messed up the formatting in the previous email. Here
> is it again.
> >
> >
> >
> > Do you see any UB in the following modified test? Here also, I see a
> difference between foo3 and foo4. Again, trunk and gcc match. But not
> Clang-10.
> >
> >
> >
> > #include <stdio.h>
> >
> > #include <stdint.h>
> >
> > #include <string.h>
> >
> >
> >
> > __attribute__ ((noinline)) uint64_t foo4(__int128_t* arr1)
> >
> > {
> >
> > unsigned chunk = 34 >> 5;
> >
> > unsigned bitIndex = 34 - chunk * 32;
> >
> >
> >
> > unsigned* arr = (unsigned *)arr + chunk;
> >
> > __int128_t v1 = *arr1;
> >
> > v1 = v1 >> bitIndex;
> >
> >
> >
> > return (uint64_t) v1;
> >
> > }
> >
> >
> >
> > __attribute__ ((noinline)) uint64_t foo3(__int128_t* arr1, unsigned
> > index)
> >
> > {
> >
> > unsigned chunk = index >> 5;
> >
> > unsigned bitIndex = index - chunk * 32;
> >
> >
> >
> > unsigned *arr = (unsigned *)arr + chunk;
> >
> > __int128_t v1 = *arr1;
> >
> > v1 = v1 >> bitIndex;
> >
> >
> >
> > return (uint64_t) v1;
> >
> > }
> >
> >
> >
> > int main() {
> >
> >
> >
> > unsigned arr[4];
> >
> >
> >
> > arr[0]= 0x6f7cfd6f;
> >
> > arr[1]= 0xd96c9806;
> >
> > arr[2]= 0x89420144;
> >
> > arr[3]= 0x8a20f548;
> >
> >
> >
> > __int128_t arr1;
> >
> >
> >
> > memcpy(&arr1, arr, sizeof(arr1));
> >
> >
> >
> > printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff);
> >
> > printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff);
> >
> > }
> >
> >
> >
> >
> >
> >
> >
> > From: Riyaz Puthiyapurayil
> > Sent: Tuesday, January 26, 2021 11:25 AM
> > To: 'Roman Lebedev' <lebedev.ri at gmail.com>
> > Cc: llvm-dev at lists.llvm.org
> > Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64
> > with an invalid shift count for `shrd`
> >
> >
> >
> > Do you see any UB in the following modified test? Here also, I see a
> > difference between foo3 and foo4. Again, trunk and gcc match. But not
> > Clang-10
> >
> >
> >
> > #include <stdio.h>
> >
> > #include <stdint.h>
> >
> > #include <string.h>
> >
> >
> >
> > __attribute__ ((noinline)) uint64_t foo4(__int128_t* arr1)
> >
> > {
> >
> > unsigned chunk = 34 >> 5;
> >
> > unsigned bitIndex = 34 - chunk * 32;
> >
> >
> >
> > unsigned* arr = (unsigned *)arr + chunk;
> >
> > __int128_t v1 = *arr1;
> >
> > v1 = v1 >> bitIndex;
> >
> >
> >
> > return (uint64_t) v1;
> >
> > }
> >
> >
> >
> > __attribute__ ((noinline)) uint64_t foo3(__int128_t* arr1, unsigned
> > index)
> >
> > {
> >
> > unsigned chunk = index >> 5;
> >
> > unsigned bitIndex = index - chunk * 32;
> >
> >
> >
> > unsigned *arr = (unsigned *)arr + chunk;
> >
> > __int128_t v1 = *arr1;
> >
> > v1 = v1 >> bitIndex;
> >
> >
> >
> > return (uint64_t) v1;
> >
> > }
> >
> >
> >
> > int main() {
> >
> >
> >
> > unsigned arr[4];
> >
> >
> >
> > arr[0]= 0x6f7cfd6f;
> >
> > arr[1]= 0xd96c9806;
> >
> > arr[2]= 0x89420144;
> >
> > arr[3]= 0x8a20f548;
> >
> >
> >
> > __int128_t arr1;
> >
> >
> >
> > memcpy(&arr1, arr, sizeof(arr1));
> >
> >
> >
> > printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff); // prints
> > 1365b2601 (clang-10) 19bdf3f5b (gcc, clang-12)
> >
> > printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff); // prints
> 19bdf3f5b
> >
> > }
> >
> >
> >
> > -----Original Message-----
> > From: Riyaz Puthiyapurayil
> > Sent: Tuesday, January 26, 2021 11:10 AM
> > To: 'Roman Lebedev' <lebedev.ri at gmail.com>
> > Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64
> > with an invalid shift count for `shrd`
> >
> >
> >
> > You may be right about that the cast from unsigned* to __int128_t* being
> UB. But I think that has nothing to do with the bug. That code can be fixed
> to use a __int128_t array and change all the casts from __int128_t* to
> unsigned* (instead of the other way around) and we would still have the
> issue.
> >
> >
> >
> > -----Original Message-----
> >
> > From: Roman Lebedev <lebedev.ri at gmail.com>
> >
> > Sent: Tuesday, January 26, 2021 10:34 AM
> >
> > To: Riyaz Puthiyapurayil <riyaz at synopsys.com>
> >
> > Cc: llvm-dev at lists.llvm.org
> >
> > Subject: Re: [llvm-dev] LLVM 10 is miscompiling this code on x86_64
> > with an invalid shift count for `shrd`
> >
> >
> >
> > On Tue, Jan 26, 2021 at 9:28 PM Riyaz Puthiyapurayil via llvm-dev <
> llvm-dev at lists.llvm.org> wrote:
> >
> > >
> >
> > > It appears that LLVM 10 is miscompiling the following test on x86_64
> but the trunk version seems to work fine. Before I spend any time debugging
> this, it would be helpful if someone can point me to a bug fix if there was
> one.
> >
> > >
> >
> > >
> >
> > >
> >
> > > #include <stdio.h>
> >
> > >
> >
> > > #include <stdint.h>
> >
> > >
> >
> > >
> >
> > >
> >
> > > __attribute__ ((noinline)) uint64_t foo4(unsigned* arr)
> >
> > >
> >
> > > {
> >
> > >
> >
> > > unsigned chunk = 34 >> 5;
> >
> > >
> >
> > > unsigned bitIndex = 34 - chunk * 32;
> >
> > >
> >
> > >
> >
> > >
> >
> > > arr = arr + chunk;
> >
> > >
> >
> > > __int128_t* arr1 = (__int128_t*) arr;
> >
> > I'm pretty sure this is UB, because the alignment of __int128_t is
> bigger than that of unsigned.
> >
> >
> >
> > >
> >
> > > __int128_t v1 = *arr1;
> >
> > >
> >
> > > v1 = v1 >> bitIndex;
> >
> > >
> >
> > >
> >
> > >
> >
> > > return (uint64_t) v1;
> >
> > >
> >
> > > }
> >
> > >
> >
> > >
> >
> > >
> >
> > > __attribute__ ((noinline)) uint64_t foo3(unsigned* arr, unsigned
> >
> > > index)
> >
> > >
> >
> > > {
> >
> > >
> >
> > > unsigned chunk = index >> 5;
> >
> > >
> >
> > > unsigned bitIndex = index - chunk * 32;
> >
> > >
> >
> > >
> >
> > >
> >
> > > arr = arr + chunk;
> >
> > >
> >
> > > __int128_t* arr1 = (__int128_t*) arr;
> >
> > >
> >
> > > __int128_t v1 = *arr1;
> >
> > >
> >
> > > v1 = v1 >> bitIndex;
> >
> > >
> >
> > >
> >
> > >
> >
> > > return (uint64_t) v1;
> >
> > >
> >
> > > }
> >
> > >
> >
> > >
> >
> > >
> >
> > >
> >
> > >
> >
> > > int main() {
> >
> > >
> >
> > >
> >
> > >
> >
> > > unsigned arr[5];
> >
> > >
> >
> > >
> >
> > >
> >
> > > arr[0]= 0x6f7cfd6f;
> >
> > >
> >
> > > arr[1]= 0xd96c9806;
> >
> > >
> >
> > > arr[2]= 0x89420144;
> >
> > >
> >
> > > arr[3]= 0x8a20f548;
> >
> > >
> >
> > >
> >
> > >
> >
> > > printf("foo3 %lx\n",foo3(arr,34) & 0x3ffffffff); // prints
> >
> > > 222508051 instead of 1365b2601
> >
> > >
> >
> > > printf("foo4 %lx\n",foo4(arr) & 0x3ffffffff); // prints
> > > 1365b2601
> >
> > > as expected
> >
> > >
> >
> > > }
> >
> > >
> >
> > >
> >
> > >
> >
> > > If you look at the assembly, there is an ‘and cl, 31’ missing in foo3
> which is needed to make sure that the shift count is in the range [0...31].
> This is missing in the generated code. Note that if the count in CL
> register is greater than 31, x86_64 manual states that the behavior of shrd
> is undefined.
> >
> > >
> >
> > >
> >
> > >
> >
> > > # clang 10
> >
> > >
> >
> > > foo3(unsigned int*, unsigned int): #
> @foo3(unsigned int*, unsigned int)
> >
> > >
> >
> > > mov ecx, esi
> >
> > >
> >
> > > mov edx, esi
> >
> > >
> >
> > > shr edx, 5
> >
> > >
> >
> > > mov rax, qword ptr [rdi + 4*rdx]
> >
> > >
> >
> > > mov rdx, qword ptr [rdi + 4*rdx + 8]
> >
> > >
> >
> > > shrd rax, rdx, cl
> >
> > >
> >
> > > ret
> >
> > >
> >
> > >
> >
> > >
> >
> > > # Trunk
> >
> > >
> >
> > > foo3(unsigned int*, unsigned int): #
> @foo3(unsigned int*, unsigned int)
> >
> > >
> >
> > > mov ecx, esi
> >
> > >
> >
> > > mov edx, esi
> >
> > >
> >
> > > shr edx, 5
> >
> > >
> >
> > > mov rax, qword ptr [rdi + 4*rdx]
> >
> > >
> >
> > > mov rdx, qword ptr [rdi + 4*rdx + 8]
> >
> > >
> >
> > > and cl, 31
> >
> > >
> >
> > > shrd rax, rdx, cl
> >
> > >
> >
> > > ret
> >
> > >
> >
> >
> >
> > Roman.
> >
> >
> >
> > > _______________________________________________
> >
> > > LLVM Developers mailing list
> >
> > > llvm-dev at lists.llvm.org
> >
> > > https://urldefense.com/v3/__https://lists.llvm.org/cgi-bin/mailman/l
> > > is
> >
> > > tinfo/llvm-dev__;!!A4F2R9G_pg!Mq1ZgXCY7kwDMwNwdn4Tcz-KgkoXMlP_rjq7_U
> > > 9M
> >
> > > rh1gbrtqvDoSi6cMuSLZaXIPgtijPW55JJ_8$
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20210126/36dc9946/attachment.html>
More information about the llvm-dev
mailing list