[PATCH] D44785: Lowering x86 adds/addus/subs/subus intrinsics (llvm part)

Tom Hudson via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 20 08:54:17 PDT 2018


tomhudson added a comment.

The simplest example I have of what's breaking is the llvmpipe test code; src/gallium/drivers/llvmpipe/lp_test_blend.c from https://cgit.freedesktop.org/mesa/mesa/. Both 17.0.3 and 18.0.0 break.

llvmpipe was explicitly issuing sse2.psubs and sse2.padds in src/gallium/auxiliary/gallivm/lp_bld_arit.c; that produced working code before this patch, but after this patch immediately crashes. By removing the issue of the sse2 padds/psubs our test case works again.

Debug output looks like this:

llc -mattr option(s): +sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,-prfchw,+bmi2,-cldemote,+fsgsbase,-xsavec,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3
llc -mcpu option: haswell

test:

    0:         pushq   %rbp
    1:         movq    %rsp, %rbp
    4:         pushq   %rbx
    5:         subq    $40, %rsp
    9:         movq    %r8, %rbx
   12:         vmovdqa (%rdi), %xmm0
   16:         vmovdqa (%rdx), %xmm1
   20:         movabsq $140737174016000, %rax
   30:         vpand   (%rax), %xmm0, %xmm2
   34:         vpsrld  $8, %xmm2, %xmm3
   39:         vpor    %xmm2, %xmm3, %xmm2
   43:         vpcmpeqd        %xmm3, %xmm3, %xmm3
   47:         vpxor   %xmm3, %xmm0, %xmm3
   51:         movabsq $140737174016032, %rax
   61:         vpbroadcastd    (%rax), %xmm4
   66:         vmovdqa %xmm4, -48(%rbp)
   71:         vpblendvb       %xmm4, (%rsi), %xmm3, %xmm3
   77:         vpsrld  $16, %xmm2, %xmm4
   82:         vpor    %xmm2, %xmm4, %xmm2
   86:         vpmovzxbw       %xmm0, %xmm4
   91:         vpxor   %xmm5, %xmm5, %xmm5
   95:         vpunpckhbw      %xmm5, %xmm0, %xmm0
   99:         vpmovzxbw       %xmm2, %xmm6
  104:         vpmullw %xmm4, %xmm6, %xmm4
  108:         vpunpckhbw      %xmm5, %xmm2, %xmm2
  112:         vpmullw %xmm0, %xmm2, %xmm0
  116:         vpsrlw  $8, %xmm4, %xmm2
  121:         movabsq $140737174016016, %rax
  131:         vmovdqa (%rax), %xmm6
  135:         vpaddw  %xmm6, %xmm4, %xmm4
  139:         vpaddw  %xmm4, %xmm2, %xmm2
  143:         vpsrlw  $8, %xmm2, %xmm2
  148:         vpsrlw  $8, %xmm0, %xmm4
  153:         vpaddw  %xmm6, %xmm0, %xmm0
  157:         vpaddw  %xmm0, %xmm4, %xmm0
  161:         vpsrlw  $8, %xmm0, %xmm0
  166:         vpackuswb       %xmm0, %xmm2, %xmm0
  170:         vpmovzxbw       %xmm1, %xmm2
  175:         vpunpckhbw      %xmm5, %xmm1, %xmm1
  179:         vpmovzxbw       %xmm3, %xmm4
  184:         vpmullw %xmm4, %xmm2, %xmm2
  188:         vpunpckhbw      %xmm5, %xmm3, %xmm3
  192:         vpmullw %xmm3, %xmm1, %xmm1
  196:         vpsrlw  $8, %xmm2, %xmm3
  201:         vpaddw  %xmm6, %xmm2, %xmm2
  205:         vpaddw  %xmm2, %xmm3, %xmm2
  209:         vpsrlw  $8, %xmm2, %xmm2
  214:         vpsrlw  $8, %xmm1, %xmm3
  219:         vpaddw  %xmm6, %xmm1, %xmm1
  223:         vpaddw  %xmm1, %xmm3, %xmm1
  227:         vpsrlw  $8, %xmm1, %xmm1
  232:         vpackuswb       %xmm1, %xmm2, %xmm1
  236:         vpminub %xmm1, %xmm0, %xmm2
  240:         vmovdqa %xmm2, -32(%rbp)
  245:         movabsq $0, %rax
  255:         callq   *%rax
  257:         vmovdqa -48(%rbp), %xmm1
  262:         vpblendvb       %xmm1, -32(%rbp), %xmm0, %xmm0
  269:         vmovdqa %xmm0, (%rbx)
  273:         addq    $40, %rsp
  277:         popq    %rbx
  278:         popq    %rbp
  279:         retq

After we return from the callq, we seem to have a corrupt stack.


Repository:
  rL LLVM

https://reviews.llvm.org/D44785





More information about the llvm-commits mailing list