[PATCH] D44785: Lowering x86 adds/addus/subs/subus intrinsics (llvm part)
Tom Hudson via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 20 08:54:17 PDT 2018
tomhudson added a comment.
The simplest example I have of what's breaking is the llvmpipe test code; src/gallium/drivers/llvmpipe/lp_test_blend.c from https://cgit.freedesktop.org/mesa/mesa/. Both 17.0.3 and 18.0.0 break.
llvmpipe was explicitly issuing sse2.psubs and sse2.padds in src/gallium/auxiliary/gallivm/lp_bld_arit.c; that produced working code before this patch, but after this patch immediately crashes. By removing the issue of the sse2 padds/psubs our test case works again.
Debug output looks like this:
llc -mattr option(s): +sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,-prfchw,+bmi2,-cldemote,+fsgsbase,-xsavec,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3
llc -mcpu option: haswell
test:
0: pushq %rbp
1: movq %rsp, %rbp
4: pushq %rbx
5: subq $40, %rsp
9: movq %r8, %rbx
12: vmovdqa (%rdi), %xmm0
16: vmovdqa (%rdx), %xmm1
20: movabsq $140737174016000, %rax
30: vpand (%rax), %xmm0, %xmm2
34: vpsrld $8, %xmm2, %xmm3
39: vpor %xmm2, %xmm3, %xmm2
43: vpcmpeqd %xmm3, %xmm3, %xmm3
47: vpxor %xmm3, %xmm0, %xmm3
51: movabsq $140737174016032, %rax
61: vpbroadcastd (%rax), %xmm4
66: vmovdqa %xmm4, -48(%rbp)
71: vpblendvb %xmm4, (%rsi), %xmm3, %xmm3
77: vpsrld $16, %xmm2, %xmm4
82: vpor %xmm2, %xmm4, %xmm2
86: vpmovzxbw %xmm0, %xmm4
91: vpxor %xmm5, %xmm5, %xmm5
95: vpunpckhbw %xmm5, %xmm0, %xmm0
99: vpmovzxbw %xmm2, %xmm6
104: vpmullw %xmm4, %xmm6, %xmm4
108: vpunpckhbw %xmm5, %xmm2, %xmm2
112: vpmullw %xmm0, %xmm2, %xmm0
116: vpsrlw $8, %xmm4, %xmm2
121: movabsq $140737174016016, %rax
131: vmovdqa (%rax), %xmm6
135: vpaddw %xmm6, %xmm4, %xmm4
139: vpaddw %xmm4, %xmm2, %xmm2
143: vpsrlw $8, %xmm2, %xmm2
148: vpsrlw $8, %xmm0, %xmm4
153: vpaddw %xmm6, %xmm0, %xmm0
157: vpaddw %xmm0, %xmm4, %xmm0
161: vpsrlw $8, %xmm0, %xmm0
166: vpackuswb %xmm0, %xmm2, %xmm0
170: vpmovzxbw %xmm1, %xmm2
175: vpunpckhbw %xmm5, %xmm1, %xmm1
179: vpmovzxbw %xmm3, %xmm4
184: vpmullw %xmm4, %xmm2, %xmm2
188: vpunpckhbw %xmm5, %xmm3, %xmm3
192: vpmullw %xmm3, %xmm1, %xmm1
196: vpsrlw $8, %xmm2, %xmm3
201: vpaddw %xmm6, %xmm2, %xmm2
205: vpaddw %xmm2, %xmm3, %xmm2
209: vpsrlw $8, %xmm2, %xmm2
214: vpsrlw $8, %xmm1, %xmm3
219: vpaddw %xmm6, %xmm1, %xmm1
223: vpaddw %xmm1, %xmm3, %xmm1
227: vpsrlw $8, %xmm1, %xmm1
232: vpackuswb %xmm1, %xmm2, %xmm1
236: vpminub %xmm1, %xmm0, %xmm2
240: vmovdqa %xmm2, -32(%rbp)
245: movabsq $0, %rax
255: callq *%rax
257: vmovdqa -48(%rbp), %xmm1
262: vpblendvb %xmm1, -32(%rbp), %xmm0, %xmm0
269: vmovdqa %xmm0, (%rbx)
273: addq $40, %rsp
277: popq %rbx
278: popq %rbp
279: retq
After we return from the callq, we seem to have a corrupt stack.
Repository:
rL LLVM
https://reviews.llvm.org/D44785
More information about the llvm-commits
mailing list