[llvm-dev] Where's the optimiser gone? (part 0)
Stefan Kanthak via llvm-dev
llvm-dev at lists.llvm.org
Wed Nov 28 05:22:45 PST 2018
Hi @ll,
compiler-rt implements the Windows-specific routines
compiler-rt/lib/builtins/i386/chkstk.S and
compiler-rt/lib/builtins/x86_64/chkstk.S
See <http://msdn.microsoft.com/en-us/library/ms648426.aspx>
Their implementation is but LESS THAN optimal: they can
yield upto (stacksize / pagesize) superfluous page accesses
(and thus superfluous page faults)!
As implemented, ALL calls of chkstk() touch ALL pages from
the current "top" of stack to its new "top", which might
become the new stack "limit": on access of the "guard page"
Windows handles the stack growth.
Touching of pages already touched before, ie. above the
current "limit" of the stack, is but NOT necessary!
Properly optimised chkstk() implementations (for ML.EXE
and ML64.EXE respectively), which touch every page only
once, are shown below!
regards
Stefan Kanthak
See <https://godbolt.org/z/1jSn6->
--- sample0.c ---
void foo(int bar) {
int array[234567];
array[234566] = bar;
}
_foo: # @foo
push ebp
mov ebp, esp
mov eax, 938272
call __chkstk
mov eax, dword ptr [ebp + 8]
mov ecx, dword ptr [ebp + 8]
mov dword ptr [ebp - 4], ecx
mov dword ptr [ebp - 938272], eax # 4-byte Spill
add esp, 938272
pop ebp
ret
int main(int argc) {
foo (argc);
foo (argc);
}
--- chkstk.asm (for I386) ---
; Copyright (C) 2004-2018, Stefan Kanthak <stefan.kanthak at nexgo.de>
.686
.model flat, C
.code
; MSVC internal intrinsic _alloca() alias _chkstk():
; argument is passed in eax, result is returned in esp
; https://msdn.microsoft.com/en-us/library/wb1s57t5.aspx
; https://msdn.microsoft.com/en-us/library/ms648426.aspx
_alloca_probe proc public ; VOID *_alloca_probe(DWORD size)
_chkstk proc public ; VOID _chkstk(DWORD size)
push ecx ; decrement esp, save ecx
lea ecx, [esp+8] ; ecx = stack pointer of caller
sub ecx, eax ; ecx = new (unaligned) stack pointer
; Check for wraparound, yield 'stack overflow' exception
sbb eax, eax ; eax = -1 on carry, else 0
not eax ; eax = 0 if wraparound, else -1
and ecx, eax ; ecx = 0 if wraparound, else unchanged
assume fs:flat
mov eax, fs:[8] ; eax = (current) stack limit
cmp eax, ecx
jna short DONE ; stack limit not above new stack pointer?
; Probe next stack page, yield 'guard page' exception
PROBE:
sub eax, 4096 ; eax = next stack page
test eax, [eax]
cmp eax, ecx
ja short PROBE ; stack limit above new stack pointer?
DONE:
mov eax, ecx ; eax = new stack pointer
pop ecx ; restore ecx
xchg eax, esp ; esp = new stack pointer,
; eax = old stack pointer
push [eax]
ret
_chkstk endp
_alloca_probe endp
end
--- chkstk.asm (for AMD64) ---
; Copyright (C) 2004-2018, Stefan Kanthak <stefan.kanthak at nexgo.de>
.code
; MSVC internal intrinsic _alloca() alias _chkstk():
; argument is passed in rax
; https://msdn.microsoft.com/en-us/library/wb1s57t5.aspx
; https://msdn.microsoft.com/en-us/library/ms648426.aspx
; https://msdn.microsoft.com/en-us/library/tawsa7cb.aspx
__chkstk proc public ; VOID _chkstk(QWORD size)
xor r10, r10 ; r10 = 0
lea r11, [rsp+8] ; r11 = stack pointer of caller
sub r11, rax ; r11 = new stack pointer
cmovb r11, r10 ; r11 = r10 = 0 if wraparound, else unchanged
;; and r11, -16 ; r11 = new (aligned) stack pointer
mov r10, gs:[r10+16] ; r10 = (current) stack limit
cmp r10, r11
jna RETURN ; stack limit not above new stack pointer?
; Probe next stack page, yield 'guard page' exception
PROBE:
sub r10, 4096 ; r10 = next stack page
test r10, [r10]
cmp r10, r11
ja PROBE ; stack limit above new stack pointer?
RETURN:
ret
__chkstk endp
end
--- EOF ---
More information about the llvm-dev
mailing list