[llvm-dev] Where's the optimiser gone (part 10): sptting a cookie
Stefan Kanthak via llvm-dev
llvm-dev at lists.llvm.org
Mon Jan 14 10:59:15 PST 2019
Compile with -O3 -m32, or generate an assembly listing of __divdi3
and __moddi3 as shipped in clang_rt.builtins-i386.lib
unsigned long long __udivmoddi4(unsigned long long numerator,
unsigned long long denominator,
unsigned long long *remainder);
long long __moddi3(long long dividend, long long divisor)
{
long long r = divisor >> 63; // r = divisor < 0 ? -1 : 0
long long s = dividend >> 63; // s = dividend < 0 ? -1 : 0
divisor = (divisor ^ r) - r; // negate if divisor < 0
dividend = (dividend ^ s) - s; // negate if dividend < 0
__udivmoddi4(dividend, divisor, (unsigned long long *) &r);
return (r ^ s) - s; // negate if dividend < 0
}
___moddi3:
00: 55 push ebp |
01: 89 E5 mov ebp, esp |
03: 53 push ebx | push ebx
04: 57 push edi |
05: 56 push esi |
06: 83 E4 F8 and esp, 0FFFFFFF8h |
09: 83 EC 10 sub esp, 10h | sub esp, 8
0C: 8B 45 14 mov eax, [ebp+14h] | mov eax, [esp+28]
0F: 8B 55 10 mov edx, [ebp+10h] | mov ecx, [esp+24]
12: 8B 35 00 00 00 00 mov esi, [___security_cookie] |
18: 89 E7 mov edi, esp | push esp
1A: 89 C1 mov ecx, eax |
1C: C1 F9 1F sar ecx, 1Fh | cdq
1F: 01 CA add edx, ecx | xor ecx, edx
21: 11 C8 adc eax, ecx | xor eax, edx
23: 31 CA xor edx, ecx | sub ecx, edx
25: 31 EE xor esi, ebp |
27: 31 C8 xor eax, ecx | sbb eax, edx
| push eax
| push ecx
29: 8B 4D 0C mov ecx, [ebp+0Ch] | mov eax, [esp+32]
2C: 89 74 24 08 mov [esp+8],esi |
30: 8B 75 08 mov esi, [ebp+8] | mov ecx, [esp+28]
33: 89 CB mov ebx, ecx | cdq
35: C1 FB 1F sar ebx, 1Fh | mov ebx, edx
38: 31 DE xor esi, ebx | xor ecx, edx
3A: 31 D9 xor ecx, ebx | xor eax, edx
3C: 29 DE sub esi, ebx | sub ecx, edx
3E: 19 D9 sbb ecx, ebx | sbb eax, edx
40: 57 push edi |
41: 50 push eax | push eax
42: 52 push edx |
43: 51 push ecx | push ecx
44: 56 push esi |
45: E8 00 00 00 00 call ___udivmoddi4 | call ___udivmoddi4
4A: 83 C4 14 add esp, 14h | add esp, 20
4D: 8B 3C 24 mov edi, [esp] |
50: 8B 74 24 04 mov esi, [esp+4] | mov eax, [esp]
54: 8B 4C 24 08 mov ecx, [esp+8] | mov edx, [esp+4]
58: 31 DF xor edi, ebx | xor eax, ebx
5A: 31 DE xor esi, ebx | xor edx, ebx
5C: 29 DF sub edi, ebx | sub eax, ebx
5E: 19 DE sbb esi, ebx | sbb edx, ebx
60: 31 E9 xor ecx, ebp |
62: E8 00 00 00 00 call @__security_check_cookie at 4|
67: 89 F8 mov eax, edi |
69: 89 F2 mov edx, esi |
6B: 8D 65 F4 lea esp, [ebp-0Ch] | add esp, 8
6E: 5E pop esi |
6F: 5F pop edi |
70: 5B pop ebx | pop ebx
71: 5D pop ebp |
72: C3 ret | ret
clang generates 51 instructions, 18 more than properly optimised code,
tinkers with a stack cookie, although there is no array allocated on
the stack, and clobbers registers EDI and ESI without necessity.
long long __divdi3(long long dividend, long long divisor)
{
long long r = divisor >> 63; // r = divisor < 0 ? -1 : 0
long long s = dividend >> 63; // s = dividend < 0 ? -1 : 0
divisor = (divisor ^ r) - r; // negate if divisor < 0
dividend = (dividend ^ s) - s; // negate if dividend < 0
s ^= r; // sign of quotient
// negate if quotient < 0
return (__udivmoddi4(dividend, divisor, 0) ^ s) - s;
}
__divdi3: # @__divdi3
push ebx | push ebx
push edi |
push esi |
mov ecx, dword ptr [esp + 28] | mov eax, [esp+20]
mov eax, dword ptr [esp + 20] |
mov edi, dword ptr [esp + 24] | mov ecx, [esp+16]
mov ebx, dword ptr [esp + 16] |
mov edx, ecx |
mov esi, eax |
sar edx, 31 | cdq
sar esi, 31 | mov ebx, edx
xor edi, edx | xor ecx, edx
xor ecx, edx | xor eax, edx
sub edi, edx | sub ecx, edx
sbb ecx, edx | sbb eax, edx
xor ebx, esi |
xor eax, esi |
sub ebx, esi |
sbb eax, esi |
xor esi, edx |
sub esp, 12 # WTF? |
push 0 | push 0
push ecx | push eax
push edi | push ecx
| mov eax, [esp+24]
| mov ecx, [esp+20]
| cdq
| xor ecx, edx
| xor eax, edx
| sub ecx, edx
| sbb eax, edx
| xor ebx, edx
push eax | push eax
push ebx | push ecx
call __udivmoddi4 | call __udivmoddi4
add esp, 32 | add esp, 20
xor eax, esi | xor eax, ebx
xor edx, esi | xor edx, ebx
sub eax, esi | sub eax, ebx
sbb edx, esi | sbb edx, ebx
pop esi |
pop edi |
pop ebx | pop ebx
ret | ret
clang generates 36 instructions, 6 more than properly optimised code,
tinkers with ESP and clobbers registers EDI and ESI without necessity.
stay tuned
Stefan Kanthak
More information about the llvm-dev
mailing list