[llvm-dev] Where's the optimiser gone? (part 0)

Stefan Kanthak via llvm-dev llvm-dev at lists.llvm.org
Wed Nov 28 05:22:45 PST 2018

Hi @ll,

compiler-rt implements the Windows-specific routines
compiler-rt/lib/builtins/i386/chkstk.S and
See <http://msdn.microsoft.com/en-us/library/ms648426.aspx>

Their implementation is but LESS THAN optimal: they can
yield upto (stacksize / pagesize) superfluous page accesses
(and thus superfluous page faults)!

As implemented, ALL calls of chkstk() touch ALL pages from
the current "top" of stack to its new "top", which might
become the new stack "limit": on access of the "guard page"
Windows handles the stack growth.
Touching of pages already touched before, ie. above the
current "limit" of the stack, is but NOT necessary!

Properly optimised chkstk() implementations (for ML.EXE
and ML64.EXE respectively), which touch every page only
once, are shown below!

Stefan Kanthak

See <https://godbolt.org/z/1jSn6->

--- sample0.c ---

void foo(int bar) {
    int array[234567];
    array[234566] = bar;

_foo: # @foo
    push  ebp
    mov   ebp, esp
    mov   eax, 938272
    call  __chkstk
    mov   eax, dword ptr [ebp + 8]
    mov   ecx, dword ptr [ebp + 8]
    mov   dword ptr [ebp - 4], ecx
    mov   dword ptr [ebp - 938272], eax # 4-byte Spill
    add   esp, 938272
    pop   ebp

int main(int argc) {
    foo (argc);
    foo (argc);

--- chkstk.asm (for I386) ---
; Copyright (C) 2004-2018, Stefan Kanthak <stefan.kanthak at nexgo.de>

    .model flat, C

; MSVC internal intrinsic _alloca() alias _chkstk():
; argument is passed in eax, result is returned in esp
; https://msdn.microsoft.com/en-us/library/wb1s57t5.aspx
; https://msdn.microsoft.com/en-us/library/ms648426.aspx

_alloca_probe proc public  ; VOID *_alloca_probe(DWORD size)
_chkstk proc public        ; VOID _chkstk(DWORD size)

    push   ecx              ; decrement esp, save ecx

    lea    ecx, [esp+8]     ; ecx = stack pointer of caller
    sub    ecx, eax         ; ecx = new (unaligned) stack pointer

    ; Check for wraparound, yield 'stack overflow' exception
    sbb    eax, eax         ; eax = -1 on carry, else 0
    not    eax              ; eax = 0 if wraparound, else -1
    and    ecx, eax         ; ecx = 0 if wraparound, else unchanged

    assume fs:flat
    mov    eax, fs:[8]      ; eax = (current) stack limit

    cmp    eax, ecx
    jna    short DONE       ; stack limit not above new stack pointer?

    ; Probe next stack page, yield 'guard page' exception
    sub    eax, 4096        ; eax = next stack page
    test   eax, [eax]

    cmp    eax, ecx
    ja     short PROBE      ; stack limit above new stack pointer?

    mov    eax, ecx         ; eax = new stack pointer
    pop    ecx              ; restore ecx
    xchg   eax, esp         ; esp = new stack pointer,
                            ; eax = old stack pointer

    push   [eax]

_chkstk endp
_alloca_probe endp


--- chkstk.asm (for AMD64) ---
; Copyright (C) 2004-2018, Stefan Kanthak <stefan.kanthak at nexgo.de>


; MSVC internal intrinsic _alloca() alias _chkstk():
; argument is passed in rax
; https://msdn.microsoft.com/en-us/library/wb1s57t5.aspx
; https://msdn.microsoft.com/en-us/library/ms648426.aspx
; https://msdn.microsoft.com/en-us/library/tawsa7cb.aspx

__chkstk proc public         ; VOID _chkstk(QWORD size)

    xor    r10, r10          ; r10 = 0
    lea    r11, [rsp+8]      ; r11 = stack pointer of caller
    sub    r11, rax          ; r11 = new stack pointer
    cmovb  r11, r10          ; r11 = r10 = 0 if wraparound, else unchanged

;;  and    r11, -16          ; r11 = new (aligned) stack pointer

    mov    r10, gs:[r10+16]  ; r10 = (current) stack limit
    cmp    r10, r11
    jna    RETURN            ; stack limit not above new stack pointer?

    ; Probe next stack page, yield 'guard page' exception
    sub    r10, 4096         ; r10 = next stack page
    test   r10, [r10]

    cmp    r10, r11
    ja     PROBE             ; stack limit above new stack pointer?


__chkstk endp

--- EOF ---

More information about the llvm-dev mailing list