[llvm-dev] Where's the optimiser gone (part 10): sptting a cookie

Stefan Kanthak via llvm-dev llvm-dev at lists.llvm.org
Mon Jan 14 10:59:15 PST 2019

Compile with -O3 -m32, or generate an assembly listing of __divdi3
and __moddi3 as shipped in clang_rt.builtins-i386.lib

unsigned long long __udivmoddi4(unsigned long long numerator,
                                unsigned long long denominator,
                                unsigned long long *remainder);

long long __moddi3(long long dividend, long long divisor)
    long long r = divisor >> 63;    // r = divisor < 0 ? -1 : 0
    long long s = dividend >> 63;   // s = dividend < 0 ? -1 : 0
    divisor = (divisor ^ r) - r;    // negate if divisor < 0
    dividend = (dividend ^ s) - s;  // negate if dividend < 0
    __udivmoddi4(dividend, divisor, (unsigned long long *) &r);
    return (r ^ s) - s;             // negate if dividend < 0

00: 55                 push    ebp                       |
01: 89 E5              mov     ebp, esp                  |
03: 53                 push    ebx                       |    push    ebx
04: 57                 push    edi                       |
05: 56                 push    esi                       |
06: 83 E4 F8           and     esp, 0FFFFFFF8h           |
09: 83 EC 10           sub     esp, 10h                  |    sub     esp, 8     
0C: 8B 45 14           mov     eax, [ebp+14h]            |    mov     eax, [esp+28]
0F: 8B 55 10           mov     edx, [ebp+10h]            |    mov     ecx, [esp+24]
12: 8B 35 00 00 00 00  mov     esi, [___security_cookie] |
18: 89 E7              mov     edi, esp                  |    push    esp
1A: 89 C1              mov     ecx, eax                  |
1C: C1 F9 1F           sar     ecx, 1Fh                  |    cdq
1F: 01 CA              add     edx, ecx                  |    xor     ecx, edx
21: 11 C8              adc     eax, ecx                  |    xor     eax, edx
23: 31 CA              xor     edx, ecx                  |    sub     ecx, edx
25: 31 EE              xor     esi, ebp                  |
27: 31 C8              xor     eax, ecx                  |    sbb     eax, edx
                                                         |    push    eax
                                                         |    push    ecx
29: 8B 4D 0C           mov     ecx, [ebp+0Ch]            |    mov     eax, [esp+32]
2C: 89 74 24 08        mov     [esp+8],esi               |
30: 8B 75 08           mov     esi, [ebp+8]              |    mov     ecx, [esp+28]
33: 89 CB              mov     ebx, ecx                  |    cdq
35: C1 FB 1F           sar     ebx, 1Fh                  |    mov     ebx, edx
38: 31 DE              xor     esi, ebx                  |    xor     ecx, edx
3A: 31 D9              xor     ecx, ebx                  |    xor     eax, edx
3C: 29 DE              sub     esi, ebx                  |    sub     ecx, edx
3E: 19 D9              sbb     ecx, ebx                  |    sbb     eax, edx
40: 57                 push    edi                       |
41: 50                 push    eax                       |    push    eax
42: 52                 push    edx                       |
43: 51                 push    ecx                       |    push    ecx
44: 56                 push    esi                       |
45: E8 00 00 00 00     call    ___udivmoddi4             |    call    ___udivmoddi4
4A: 83 C4 14           add     esp, 14h                  |    add     esp, 20
4D: 8B 3C 24           mov     edi, [esp]                |
50: 8B 74 24 04        mov     esi, [esp+4]              |    mov     eax, [esp]
54: 8B 4C 24 08        mov     ecx, [esp+8]              |    mov     edx, [esp+4]
58: 31 DF              xor     edi, ebx                  |    xor     eax, ebx
5A: 31 DE              xor     esi, ebx                  |    xor     edx, ebx
5C: 29 DF              sub     edi, ebx                  |    sub     eax, ebx
5E: 19 DE              sbb     esi, ebx                  |    sbb     edx, ebx
60: 31 E9              xor     ecx, ebp                  |
62: E8 00 00 00 00     call    @__security_check_cookie at 4|
67: 89 F8              mov     eax, edi                  |
69: 89 F2              mov     edx, esi                  |
6B: 8D 65 F4           lea     esp, [ebp-0Ch]            |    add     esp, 8
6E: 5E                 pop     esi                       |
6F: 5F                 pop     edi                       |
70: 5B                 pop     ebx                       |    pop     ebx
71: 5D                 pop     ebp                       |
72: C3                 ret                               |    ret

clang generates 51 instructions, 18 more than properly optimised code,
tinkers with a stack cookie, although there is no array allocated on
the stack, and clobbers registers EDI and ESI without necessity.

long long __divdi3(long long dividend, long long divisor)
    long long r = divisor >> 63;    // r = divisor < 0 ? -1 : 0
    long long s = dividend >> 63;   // s = dividend < 0 ? -1 : 0
    divisor = (divisor ^ r) - r;    // negate if divisor < 0
    dividend = (dividend ^ s) - s;  // negate if dividend < 0
    s ^= r;                         // sign of quotient
                                    // negate if quotient < 0
    return (__udivmoddi4(dividend, divisor, 0) ^ s) - s;

__divdi3:                   # @__divdi3
        push    ebx                        |    push    ebx
        push    edi                        |
        push    esi                        |
        mov     ecx, dword ptr [esp + 28]  |    mov     eax, [esp+20]
        mov     eax, dword ptr [esp + 20]  |
        mov     edi, dword ptr [esp + 24]  |    mov     ecx, [esp+16]
        mov     ebx, dword ptr [esp + 16]  |
        mov     edx, ecx                   |
        mov     esi, eax                   |
        sar     edx, 31                    |    cdq
        sar     esi, 31                    |    mov     ebx, edx
        xor     edi, edx                   |    xor     ecx, edx
        xor     ecx, edx                   |    xor     eax, edx
        sub     edi, edx                   |    sub     ecx, edx
        sbb     ecx, edx                   |    sbb     eax, edx
        xor     ebx, esi                   |
        xor     eax, esi                   |
        sub     ebx, esi                   |
        sbb     eax, esi                   |
        xor     esi, edx                   |
        sub     esp, 12      # WTF?        |
        push    0                          |    push    0
        push    ecx                        |    push    eax
        push    edi                        |    push    ecx
                                           |    mov     eax, [esp+24]
                                           |    mov     ecx, [esp+20]
                                           |    cdq
                                           |    xor     ecx, edx
                                           |    xor     eax, edx
                                           |    sub     ecx, edx
                                           |    sbb     eax, edx
                                           |    xor     ebx, edx
        push    eax                        |    push    eax
        push    ebx                        |    push    ecx
        call    __udivmoddi4               |    call    __udivmoddi4
        add     esp, 32                    |    add     esp, 20
        xor     eax, esi                   |    xor     eax, ebx
        xor     edx, esi                   |    xor     edx, ebx
        sub     eax, esi                   |    sub     eax, ebx
        sbb     edx, esi                   |    sbb     edx, ebx
        pop     esi                        |
        pop     edi                        |
        pop     ebx                        |    pop     ebx
        ret                                |    ret

clang generates 36 instructions, 6 more than properly optimised code,
tinkers with ESP and clobbers registers EDI and ESI without necessity.

stay tuned
Stefan Kanthak

More information about the llvm-dev mailing list