[PATCH] D44785: Lowering x86 adds/addus/subs/subus intrinsics (llvm part)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 20 09:54:49 PDT 2018


Is the address of the call here supposed to be 0?

  245:         movabsq $0, %rax
  255:         callq   *%rax

~Craig

On Fri, Apr 20, 2018 at 8:54 AM, Tom Hudson via Phabricator <
reviews at reviews.llvm.org> wrote:

> tomhudson added a comment.
>
> The simplest example I have of what's breaking is the llvmpipe test code;
> src/gallium/drivers/llvmpipe/lp_test_blend.c from
> https://cgit.freedesktop.org/mesa/mesa/. Both 17.0.3 and 18.0.0 break.
>
> llvmpipe was explicitly issuing sse2.psubs and sse2.padds in
> src/gallium/auxiliary/gallivm/lp_bld_arit.c; that produced working code
> before this patch, but after this patch immediately crashes. By removing
> the issue of the sse2 padds/psubs our test case works again.
>
> Debug output looks like this:
>
> llc -mattr option(s): +sse2,+cx16,+sahf,-tbm,-
> avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,-prfchw,+bmi2,-
> cldemote,+fsgsbase,-xsavec,+popcnt,+aes,-avx512bitalg,-
> xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-
> avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,-rdseed,-ibt,-
> sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-
> avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+
> sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+
> f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-
> avx512dq,-adx,-avx512pf,+sse3
> llc -mcpu option: haswell
>
> test:
>
>     0:         pushq   %rbp
>     1:         movq    %rsp, %rbp
>     4:         pushq   %rbx
>     5:         subq    $40, %rsp
>     9:         movq    %r8, %rbx
>    12:         vmovdqa (%rdi), %xmm0
>    16:         vmovdqa (%rdx), %xmm1
>    20:         movabsq $140737174016000, %rax
>    30:         vpand   (%rax), %xmm0, %xmm2
>    34:         vpsrld  $8, %xmm2, %xmm3
>    39:         vpor    %xmm2, %xmm3, %xmm2
>    43:         vpcmpeqd        %xmm3, %xmm3, %xmm3
>    47:         vpxor   %xmm3, %xmm0, %xmm3
>    51:         movabsq $140737174016032, %rax
>    61:         vpbroadcastd    (%rax), %xmm4
>    66:         vmovdqa %xmm4, -48(%rbp)
>    71:         vpblendvb       %xmm4, (%rsi), %xmm3, %xmm3
>    77:         vpsrld  $16, %xmm2, %xmm4
>    82:         vpor    %xmm2, %xmm4, %xmm2
>    86:         vpmovzxbw       %xmm0, %xmm4
>    91:         vpxor   %xmm5, %xmm5, %xmm5
>    95:         vpunpckhbw      %xmm5, %xmm0, %xmm0
>    99:         vpmovzxbw       %xmm2, %xmm6
>   104:         vpmullw %xmm4, %xmm6, %xmm4
>   108:         vpunpckhbw      %xmm5, %xmm2, %xmm2
>   112:         vpmullw %xmm0, %xmm2, %xmm0
>   116:         vpsrlw  $8, %xmm4, %xmm2
>   121:         movabsq $140737174016016, %rax
>   131:         vmovdqa (%rax), %xmm6
>   135:         vpaddw  %xmm6, %xmm4, %xmm4
>   139:         vpaddw  %xmm4, %xmm2, %xmm2
>   143:         vpsrlw  $8, %xmm2, %xmm2
>   148:         vpsrlw  $8, %xmm0, %xmm4
>   153:         vpaddw  %xmm6, %xmm0, %xmm0
>   157:         vpaddw  %xmm0, %xmm4, %xmm0
>   161:         vpsrlw  $8, %xmm0, %xmm0
>   166:         vpackuswb       %xmm0, %xmm2, %xmm0
>   170:         vpmovzxbw       %xmm1, %xmm2
>   175:         vpunpckhbw      %xmm5, %xmm1, %xmm1
>   179:         vpmovzxbw       %xmm3, %xmm4
>   184:         vpmullw %xmm4, %xmm2, %xmm2
>   188:         vpunpckhbw      %xmm5, %xmm3, %xmm3
>   192:         vpmullw %xmm3, %xmm1, %xmm1
>   196:         vpsrlw  $8, %xmm2, %xmm3
>   201:         vpaddw  %xmm6, %xmm2, %xmm2
>   205:         vpaddw  %xmm2, %xmm3, %xmm2
>   209:         vpsrlw  $8, %xmm2, %xmm2
>   214:         vpsrlw  $8, %xmm1, %xmm3
>   219:         vpaddw  %xmm6, %xmm1, %xmm1
>   223:         vpaddw  %xmm1, %xmm3, %xmm1
>   227:         vpsrlw  $8, %xmm1, %xmm1
>   232:         vpackuswb       %xmm1, %xmm2, %xmm1
>   236:         vpminub %xmm1, %xmm0, %xmm2
>   240:         vmovdqa %xmm2, -32(%rbp)
>   245:         movabsq $0, %rax
>   255:         callq   *%rax
>   257:         vmovdqa -48(%rbp), %xmm1
>   262:         vpblendvb       %xmm1, -32(%rbp), %xmm0, %xmm0
>   269:         vmovdqa %xmm0, (%rbx)
>   273:         addq    $40, %rsp
>   277:         popq    %rbx
>   278:         popq    %rbp
>   279:         retq
>
> After we return from the callq, we seem to have a corrupt stack.
>
>
> Repository:
>   rL LLVM
>
> https://reviews.llvm.org/D44785
>
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180420/5760bb58/attachment.html>


More information about the llvm-commits mailing list