<div dir="ltr">It's not a solution to the actual bug (which is, as the thread you linked discusses, a problem with the assumption on LLVM's part that the __chkstk function lies within 2GB of the emitted code's address space) but there is a simple workaround: hoist all allocas to the first basic block of your function. This allows the JIT to perform all stack allocations in a single adjustment of the SP instead of needing to use dynamic stack allocation, and thereby avoids the call to __chkstk entirely.</div>

<div class="gmail_extra"><br><br><div class="gmail_quote">On Mon, Aug 19, 2013 at 12:21 PM, Kévin Szkudlapski <span dir="ltr"><<a href="mailto:szkudl.k@gmail.com" target="_blank">szkudl.k@gmail.com</a>></span> wrote:<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Hi,<br>

<br>

I'm using LLVM to convert expressions to native assembly, the problem<br>

is when LLVM compiles this code:<br>

<br>

define void @fn_0000000000000000(i8*, i8*, i8*) {<br>

bb:<br>

  %res = alloca i32<br>

  %3 = load i32* %res<br>

  %4 = bitcast i8* %0 to i32*<br>

  %5 = load i32* %4<br>

  %6 = bitcast i8* %0 to i32*<br>

  %7 = load i32* %6<br>

  %8 = xor i32 %5, %7<br>

  store volatile i32 %8, i32* %res<br>

  %9 = load i32* %res<br>

  %10 = icmp eq i32 %9, 0<br>

  br i1 %10, label %then, label %else<br>

<br>

merged:                                           ; preds = %else, %then<br>

  %11 = load i32* %res<br>

  %12 = and i32 %11, -2147483648<br>

  %13 = icmp eq i32 %12, 0<br>

  br i1 %13, label %then3, label %else4<br>

<br>

then:                                             ; preds = %bb<br>

  %zf = alloca i1<br>

  %14 = load i1* %zf<br>

  %15 = getelementptr i8* %0, i32 148<br>

  %16 = bitcast i8* %15 to i1*<br>

  %17 = load i1* %16<br>

  store volatile i1 true, i1* %16<br>

  br label %merged<br>

<br>

else:                                             ; preds = %bb<br>

  %zf1 = alloca i1<br>

  %18 = load i1* %zf1<br>

  %19 = getelementptr i8* %0, i32 148<br>

  %20 = bitcast i8* %19 to i1*<br>

  %21 = load i1* %20<br>

  store volatile i1 false, i1* %20<br>

  br label %merged<br>

<br>

merged2:                                          ; preds = %else4, %then3<br>

  %22 = bitcast i8* %0 to i32*<br>

  %23 = load i32* %22<br>

  %24 = load i32* %res<br>

  store volatile i32 %24, i32* %22<br>

  %af = alloca i1<br>

  %25 = load i1* %af<br>

  %26 = getelementptr i8* %0, i32 148<br>

  %27 = bitcast i8* %26 to i1*<br>

  %28 = load i1* %27<br>

  store volatile i1 false, i1* %27<br>

  %of = alloca i1<br>

  %29 = load i1* %of<br>

  %30 = getelementptr i8* %0, i32 148<br>

  %31 = bitcast i8* %30 to i1*<br>

  %32 = load i1* %31<br>

  store volatile i1 false, i1* %31<br>

  %cf = alloca i1<br>

  %33 = load i1* %cf<br>

  %34 = getelementptr i8* %0, i32 148<br>

  %35 = bitcast i8* %34 to i1*<br>

  %36 = load i1* %35<br>

  store volatile i1 false, i1* %35<br>

  %37 = getelementptr i8* %0, i32 64<br>

  %38 = bitcast i8* %37 to i32*<br>

  %39 = load i32* %38<br>

  %40 = getelementptr i8* %0, i32 64<br>

  %41 = bitcast i8* %40 to i32*<br>

  %42 = load i32* %41<br>

  %43 = add i32 %42, 2<br>

  store volatile i32 %43, i32* %38<br>

  ret void<br>

<br>

then3:                                            ; preds = %merged<br>

  %sf = alloca i1<br>

  %44 = load i1* %sf<br>

  %45 = getelementptr i8* %0, i32 148<br>

  %46 = bitcast i8* %45 to i1*<br>

  %47 = load i1* %46<br>

  store volatile i1 false, i1* %46<br>

  br label %merged2<br>

<br>

else4:                                            ; preds = %merged<br>

  %sf5 = alloca i1<br>

  %48 = load i1* %sf5<br>

  %49 = getelementptr i8* %0, i32 148<br>

  %50 = bitcast i8* %49 to i1*<br>

  %51 = load i1* %50<br>

  store volatile i1 true, i1* %50<br>

  br label %merged2<br>

}<br>

<br>

<br>

It generates the following assembly:<br>

0000000581D30010  push        rbp<br>

0000000581D30011  mov         rbp,rsp<br>

0000000581D30014  sub         rsp,10h<br>

0000000581D30018  mov         dword ptr [rbp-4],0<br>

0000000581D3001F  mov         al,1<br>

0000000581D30021  test        al,al<br>

0000000581D30023  jne         0000000581D30042<br>

0000000581D30029  mov         eax,10h<br>

0000000581D3002E  call        00000005F08425D0<br>

0000000581D30033  sub         rsp,rax<br>

0000000581D30036  mov         byte ptr [rcx+94h],0<br>

0000000581D3003D  jmp         0000000581D30056<br>

0000000581D30042  mov         eax,10h<br>

0000000581D30047  call        00000005F08425D0<br>

0000000581D3004C  sub         rsp,rax<br>

0000000581D3004F  mov         byte ptr [rcx+94h],1<br>

0000000581D30056  test        byte ptr [rbp-1],80h<br>

0000000581D3005A  je          0000000581D30079<br>

0000000581D30060  mov         eax,10h<br>

0000000581D30065  call        00000005F08425D0<br>

0000000581D3006A  sub         rsp,rax<br>

0000000581D3006D  mov         byte ptr [rcx+94h],1<br>

0000000581D30074  jmp         0000000581D3008D<br>

0000000581D30079  mov         eax,10h<br>

0000000581D3007E  call        00000005F08425D0<br>

0000000581D30083  sub         rsp,rax<br>

0000000581D30086  mov         byte ptr [rcx+94h],0<br>

0000000581D3008D  mov         eax,dword ptr [rbp-4]<br>

0000000581D30090  mov         dword ptr [rcx],eax<br>

0000000581D30092  mov         eax,10h<br>

0000000581D30097  call        00000005F08425D0<br>

0000000581D3009C  sub         rsp,rax<br>

0000000581D3009F  mov         byte ptr [rcx+94h],0<br>

0000000581D300A6  mov         eax,10h<br>

0000000581D300AB  call        00000005F08425D0<br>

0000000581D300B0  sub         rsp,rax<br>

0000000581D300B3  mov         byte ptr [rcx+94h],0<br>

0000000581D300BA  mov         eax,10h<br>

0000000581D300BF  call        00000005F08425D0<br>

0000000581D300C4  sub         rsp,rax<br>

0000000581D300C7  mov         byte ptr [rcx+94h],0<br>

0000000581D300CE  add         dword ptr [rcx+40h],2<br>

0000000581D300D2  mov         rsp,rbp<br>

0000000581D300D5  pop         rbp<br>

0000000581D300D6  ret<br>

<br>

The function located at 0x00000005F08425D0 is not valid (according to<br>

visual studio: 00000005F08425D0  ?? ??).<br>

<br>

If I compile LLVM bytecode using llc, this function is __chkstk:<br>

        .def     fn_0000000000000000;<br>

        .scl    2;<br>

        .type   32;<br>

        .endef<br>

        .text<br>

        .globl  fn_0000000000000000<br>

        .align  16, 0x90<br>

fn_0000000000000000:                    # @fn_0000000000000000<br>

# BB#0:                                 # %bb<br>

        push    rbp<br>

        mov     rbp, rsp<br>

        sub     rsp, 16<br>

        mov     dword ptr [rbp - 4], 0<br>

        mov     al, 1<br>

        test    al, al<br>

        jne     .LBB0_1<br>

# BB#2:                                 # %else<br>

        mov     eax, 16<br>

        call    __chkstk<br>

        sub     rsp, rax<br>

        mov     byte ptr [rcx + 148], 0<br>

        jmp     .LBB0_3<br>

.LBB0_1:                                # %then<br>

        mov     eax, 16<br>

        call    __chkstk<br>

        sub     rsp, rax<br>

        mov     byte ptr [rcx + 148], 1<br>

.LBB0_3:                                # %merged<br>

        test    byte ptr [rbp - 1], -128<br>

        je      .LBB0_4<br>

# BB#5:                                 # %else4<br>

        mov     eax, 16<br>

        call    __chkstk<br>

        sub     rsp, rax<br>

        mov     byte ptr [rcx + 148], 1<br>

        jmp     .LBB0_6<br>

.LBB0_4:                                # %then3<br>

        mov     eax, 16<br>

        call    __chkstk<br>

        sub     rsp, rax<br>

        mov     byte ptr [rcx + 148], 0<br>

.LBB0_6:                                # %merged2<br>

        mov     eax, dword ptr [rbp - 4]<br>

        mov     dword ptr [rcx], eax<br>

        mov     eax, 16<br>

        call    __chkstk<br>

        sub     rsp, rax<br>

        mov     byte ptr [rcx + 148], 0<br>

        mov     eax, 16<br>

        call    __chkstk<br>

        sub     rsp, rax<br>

        mov     byte ptr [rcx + 148], 0<br>

        mov     eax, 16<br>

        call    __chkstk<br>

        sub     rsp, rax<br>

        mov     byte ptr [rcx + 148], 0<br>

        add     dword ptr [rcx + 64], 2<br>

        mov     rsp, rbp<br>

        pop     rbp<br>

        ret<br>

<br>

It seems this issue has already been described here<br>

<a href="https://groups.google.com/forum/#!topic/llvm-commit/htNjwbWsNe8" target="_blank">https://groups.google.com/forum/#!topic/llvm-commit/htNjwbWsNe8</a><br>

<br>

I'm using this code<br>

<a href="https://github.com/wisk/medusa/blob/master/src/emul/llvm/llvm_emulator.cpp" target="_blank">https://github.com/wisk/medusa/blob/master/src/emul/llvm/llvm_emulator.cpp</a><br>

which is pretty basic.<br>

<br>

Please, tell me if you need further information about this issue.<br>

<span class="HOEnZb"><font color="#888888"><br>

--<br>

Kevin Szkudlapski<br>

_______________________________________________<br>

LLVM Developers mailing list<br>

<a href="mailto:LLVMdev@cs.uiuc.edu">LLVMdev@cs.uiuc.edu</a>         <a href="http://llvm.cs.uiuc.edu" target="_blank">http://llvm.cs.uiuc.edu</a><br>

<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev</a><br>

</font></span></blockquote></div><br></div>