[llvm-dev] Where's the optimiser gone? (part 0)

Roman Lebedev via llvm-dev llvm-dev at lists.llvm.org
Wed Nov 28 05:27:26 PST 2018


On Wed, Nov 28, 2018 at 4:24 PM Stefan Kanthak via llvm-dev
<llvm-dev at lists.llvm.org> wrote:
>
> Hi @ll,
I'm not sure this continuous stream of emails is the most productive form.
I would think these all should be either bugs on https://bugs.llvm.org,
or patches on http://reviews.llvm.org
And in any case, maybe they should be worded slightly differently..

> compiler-rt implements the Windows-specific routines
> compiler-rt/lib/builtins/i386/chkstk.S and
> compiler-rt/lib/builtins/x86_64/chkstk.S
> See <http://msdn.microsoft.com/en-us/library/ms648426.aspx>
>
> Their implementation is but LESS THAN optimal: they can
> yield upto (stacksize / pagesize) superfluous page accesses
> (and thus superfluous page faults)!
>
> As implemented, ALL calls of chkstk() touch ALL pages from
> the current "top" of stack to its new "top", which might
> become the new stack "limit": on access of the "guard page"
> Windows handles the stack growth.
> Touching of pages already touched before, ie. above the
> current "limit" of the stack, is but NOT necessary!
>
> Properly optimised chkstk() implementations (for ML.EXE
> and ML64.EXE respectively), which touch every page only
> once, are shown below!
>
> regards
> Stefan Kanthak
Roman.

> See <https://godbolt.org/z/1jSn6->
>
> --- sample0.c ---
>
> void foo(int bar) {
>     int array[234567];
>     array[234566] = bar;
> }
>
>
> _foo: # @foo
>     push  ebp
>     mov   ebp, esp
>     mov   eax, 938272
>     call  __chkstk
>     mov   eax, dword ptr [ebp + 8]
>     mov   ecx, dword ptr [ebp + 8]
>     mov   dword ptr [ebp - 4], ecx
>     mov   dword ptr [ebp - 938272], eax # 4-byte Spill
>     add   esp, 938272
>     pop   ebp
>     ret
>
> int main(int argc) {
>     foo (argc);
>     foo (argc);
> }
>
> --- chkstk.asm (for I386) ---
> ; Copyright (C) 2004-2018, Stefan Kanthak <stefan.kanthak at nexgo.de>
>
>     .686
>     .model flat, C
>     .code
>
> ; MSVC internal intrinsic _alloca() alias _chkstk():
> ; argument is passed in eax, result is returned in esp
> ; https://msdn.microsoft.com/en-us/library/wb1s57t5.aspx
> ; https://msdn.microsoft.com/en-us/library/ms648426.aspx
>
> _alloca_probe proc public  ; VOID *_alloca_probe(DWORD size)
> _chkstk proc public        ; VOID _chkstk(DWORD size)
>
>     push   ecx              ; decrement esp, save ecx
>
>     lea    ecx, [esp+8]     ; ecx = stack pointer of caller
>     sub    ecx, eax         ; ecx = new (unaligned) stack pointer
>
>     ; Check for wraparound, yield 'stack overflow' exception
>     sbb    eax, eax         ; eax = -1 on carry, else 0
>     not    eax              ; eax = 0 if wraparound, else -1
>     and    ecx, eax         ; ecx = 0 if wraparound, else unchanged
>
>     assume fs:flat
>     mov    eax, fs:[8]      ; eax = (current) stack limit
>
>     cmp    eax, ecx
>     jna    short DONE       ; stack limit not above new stack pointer?
>
>     ; Probe next stack page, yield 'guard page' exception
> PROBE:
>     sub    eax, 4096        ; eax = next stack page
>     test   eax, [eax]
>
>     cmp    eax, ecx
>     ja     short PROBE      ; stack limit above new stack pointer?
>
> DONE:
>     mov    eax, ecx         ; eax = new stack pointer
>     pop    ecx              ; restore ecx
>     xchg   eax, esp         ; esp = new stack pointer,
>                             ; eax = old stack pointer
>
>     push   [eax]
>     ret
>
>
> _chkstk endp
> _alloca_probe endp
>
>  end
>
> --- chkstk.asm (for AMD64) ---
> ; Copyright (C) 2004-2018, Stefan Kanthak <stefan.kanthak at nexgo.de>
>
>     .code
>
> ; MSVC internal intrinsic _alloca() alias _chkstk():
> ; argument is passed in rax
> ; https://msdn.microsoft.com/en-us/library/wb1s57t5.aspx
> ; https://msdn.microsoft.com/en-us/library/ms648426.aspx
> ; https://msdn.microsoft.com/en-us/library/tawsa7cb.aspx
>
> __chkstk proc public         ; VOID _chkstk(QWORD size)
>
>     xor    r10, r10          ; r10 = 0
>     lea    r11, [rsp+8]      ; r11 = stack pointer of caller
>     sub    r11, rax          ; r11 = new stack pointer
>     cmovb  r11, r10          ; r11 = r10 = 0 if wraparound, else unchanged
>
> ;;  and    r11, -16          ; r11 = new (aligned) stack pointer
>
>     mov    r10, gs:[r10+16]  ; r10 = (current) stack limit
>     cmp    r10, r11
>     jna    RETURN            ; stack limit not above new stack pointer?
>
>     ; Probe next stack page, yield 'guard page' exception
> PROBE:
>     sub    r10, 4096         ; r10 = next stack page
>     test   r10, [r10]
>
>     cmp    r10, r11
>     ja     PROBE             ; stack limit above new stack pointer?
>
> RETURN:
>     ret
>
> __chkstk endp
>
>     end
> --- EOF ---
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev


More information about the llvm-dev mailing list