[PATCH] D44785: Lowering x86 adds/addus/subs/subus intrinsics (llvm part)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 20 09:55:51 PDT 2018


Can you attach the .ll file for llc?

~Craig

On Fri, Apr 20, 2018 at 9:54 AM, Craig Topper <craig.topper at gmail.com>
wrote:

> Is the address of the call here supposed to be 0?
>
>   245:         movabsq $0, %rax
>   255:         callq   *%rax
>
> ~Craig
>
> On Fri, Apr 20, 2018 at 8:54 AM, Tom Hudson via Phabricator <
> reviews at reviews.llvm.org> wrote:
>
>> tomhudson added a comment.
>>
>> The simplest example I have of what's breaking is the llvmpipe test code;
>> src/gallium/drivers/llvmpipe/lp_test_blend.c from
>> https://cgit.freedesktop.org/mesa/mesa/. Both 17.0.3 and 18.0.0 break.
>>
>> llvmpipe was explicitly issuing sse2.psubs and sse2.padds in
>> src/gallium/auxiliary/gallivm/lp_bld_arit.c; that produced working code
>> before this patch, but after this patch immediately crashes. By removing
>> the issue of the sse2 padds/psubs our test case works again.
>>
>> Debug output looks like this:
>>
>> llc -mattr option(s): +sse2,+cx16,+sahf,-tbm,-avx512
>> ifma,-sha,-gfni,-fma4,-vpclmulqdq,-prfchw,+bmi2,-cldemote,+
>> fsgsbase,-xsavec,+popcnt,+aes,-avx512bitalg,-xsaves,-
>> avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-
>> clzero,-pku,+mmx,-lwp,-rdpid,-xop,-rdseed,-ibt,-sse4a,-
>> avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512c
>> d,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,
>> +avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+
>> ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-a
>> vx512dq,-adx,-avx512pf,+sse3
>> llc -mcpu option: haswell
>>
>> test:
>>
>>     0:         pushq   %rbp
>>     1:         movq    %rsp, %rbp
>>     4:         pushq   %rbx
>>     5:         subq    $40, %rsp
>>     9:         movq    %r8, %rbx
>>    12:         vmovdqa (%rdi), %xmm0
>>    16:         vmovdqa (%rdx), %xmm1
>>    20:         movabsq $140737174016000, %rax
>>    30:         vpand   (%rax), %xmm0, %xmm2
>>    34:         vpsrld  $8, %xmm2, %xmm3
>>    39:         vpor    %xmm2, %xmm3, %xmm2
>>    43:         vpcmpeqd        %xmm3, %xmm3, %xmm3
>>    47:         vpxor   %xmm3, %xmm0, %xmm3
>>    51:         movabsq $140737174016032, %rax
>>    61:         vpbroadcastd    (%rax), %xmm4
>>    66:         vmovdqa %xmm4, -48(%rbp)
>>    71:         vpblendvb       %xmm4, (%rsi), %xmm3, %xmm3
>>    77:         vpsrld  $16, %xmm2, %xmm4
>>    82:         vpor    %xmm2, %xmm4, %xmm2
>>    86:         vpmovzxbw       %xmm0, %xmm4
>>    91:         vpxor   %xmm5, %xmm5, %xmm5
>>    95:         vpunpckhbw      %xmm5, %xmm0, %xmm0
>>    99:         vpmovzxbw       %xmm2, %xmm6
>>   104:         vpmullw %xmm4, %xmm6, %xmm4
>>   108:         vpunpckhbw      %xmm5, %xmm2, %xmm2
>>   112:         vpmullw %xmm0, %xmm2, %xmm0
>>   116:         vpsrlw  $8, %xmm4, %xmm2
>>   121:         movabsq $140737174016016, %rax
>>   131:         vmovdqa (%rax), %xmm6
>>   135:         vpaddw  %xmm6, %xmm4, %xmm4
>>   139:         vpaddw  %xmm4, %xmm2, %xmm2
>>   143:         vpsrlw  $8, %xmm2, %xmm2
>>   148:         vpsrlw  $8, %xmm0, %xmm4
>>   153:         vpaddw  %xmm6, %xmm0, %xmm0
>>   157:         vpaddw  %xmm0, %xmm4, %xmm0
>>   161:         vpsrlw  $8, %xmm0, %xmm0
>>   166:         vpackuswb       %xmm0, %xmm2, %xmm0
>>   170:         vpmovzxbw       %xmm1, %xmm2
>>   175:         vpunpckhbw      %xmm5, %xmm1, %xmm1
>>   179:         vpmovzxbw       %xmm3, %xmm4
>>   184:         vpmullw %xmm4, %xmm2, %xmm2
>>   188:         vpunpckhbw      %xmm5, %xmm3, %xmm3
>>   192:         vpmullw %xmm3, %xmm1, %xmm1
>>   196:         vpsrlw  $8, %xmm2, %xmm3
>>   201:         vpaddw  %xmm6, %xmm2, %xmm2
>>   205:         vpaddw  %xmm2, %xmm3, %xmm2
>>   209:         vpsrlw  $8, %xmm2, %xmm2
>>   214:         vpsrlw  $8, %xmm1, %xmm3
>>   219:         vpaddw  %xmm6, %xmm1, %xmm1
>>   223:         vpaddw  %xmm1, %xmm3, %xmm1
>>   227:         vpsrlw  $8, %xmm1, %xmm1
>>   232:         vpackuswb       %xmm1, %xmm2, %xmm1
>>   236:         vpminub %xmm1, %xmm0, %xmm2
>>   240:         vmovdqa %xmm2, -32(%rbp)
>>   245:         movabsq $0, %rax
>>   255:         callq   *%rax
>>   257:         vmovdqa -48(%rbp), %xmm1
>>   262:         vpblendvb       %xmm1, -32(%rbp), %xmm0, %xmm0
>>   269:         vmovdqa %xmm0, (%rbx)
>>   273:         addq    $40, %rsp
>>   277:         popq    %rbx
>>   278:         popq    %rbp
>>   279:         retq
>>
>> After we return from the callq, we seem to have a corrupt stack.
>>
>>
>> Repository:
>>   rL LLVM
>>
>> https://reviews.llvm.org/D44785
>>
>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180420/e02b76cb/attachment.html>


More information about the llvm-commits mailing list