[LLVMdev] trunk's optimizer generates slower code than 3.5

191919 191919 at gmail.com
Fri Feb 13 00:37:04 PST 2015


I submitted the problem report to clang's bugzilla but no one seems to
care so I have to send it to the mailing list.

clang 3.7 svn (trunk 229055 as the time I was to report this problem)
generates slower code than 3.5 (Apple LLVM version 6.0
(clang-600.0.56) (based on LLVM 3.5svn)) for the following code.

It is a "8 queens puzzle" solver written as an educational example. As
compiled by both clang 3.5 and 3.7, it gave the correct answer, but
clang 3.5 generates code which runs 20% faster than 3.6/3.7.

##########################################
# clang 3.5 which comes with Xcode 6.1.1
##########################################
$ clang -O3 -mssse3 -fomit-frame-pointer -fno-stack-protector
-fno-exceptions -o 8 8.c
$ time ./8 9    # 9 queens
352 solutions
$ time ./8 10   # 10 queens
./8 9  1.63s user 0.00s system 99% cpu 1.632 total
724 solutions
./8 10  45.11s user 0.01s system 99% cpu 45.121 total

##########################################
# clang 3.7 svn trunk
##########################################
$ /opt/bin/clang -O3 -mssse3 -fomit-frame-pointer -fno-stack-protector
-fno-exceptions -o 8 8.c
$ time ./8 9    # 9 queens
352 solutions
./8 9  2.07s user 0.00s system 99% cpu 2.078 total
$ time ./8 10   # 10 queens
724 solutions
./8 10  56.63s user 0.02s system 99% cpu 56.650 total

The source code is below, I also attached the executable files as well
as the assembly code files for clang 3.5 and 3.6 by IDA.

The performance is even worse when compiling as 32-bit code while
gcc-4.9.2 is not affected.

########## clang-3.5
$ clang -m32 -O3 -fomit-frame-pointer -fno-stack-protector
-fno-exceptions -o 8 8.c
$ time ./8 9
352 solutions
./8 9  1.95s user 0.00s system 99% cpu 1.950 total

########## clang-3.7
$ /opt/bin/clang -m32 -O3 -fomit-frame-pointer -fno-stack-protector
-fno-exceptions -o 8 8.c
$ time ./8 9
352 solutions
./8 9  2.48s user 0.00s system 99% cpu 2.480 total

######### gcc-4.9.2
$ /opt/bin/gcc -m32 -O3 -fomit-frame-pointer -fno-stack-protector
-fno-exceptions -o 8 8.c
$ time ./8 9
352 solutions
./8 9  1.44s user 0.00s system 99% cpu 1.442 total


```
#include <stdio.h>
#include <stdlib.h>

static inline int validate(int* a, int d)
{
        int i, j, x;
        for (i = 0; i < d; ++i)
        {
                for (j = i+1, x = 1; j < d; ++j, ++x)
                {
                        const int d = a[i] - a[j];
                        if (d == 0 || d == -x || d == x) return 0;
                }
        }
        return 1;
}

static inline int solve(int d)
{
        int r = 0;
        int* a = (int*) calloc(sizeof(int), d+1);
        int p = d - 1;

        for (;;)
        {
                a[p]++;

                if (a[p] > d-1)
                {
                        int bp = p - 1;
                        while (bp >= 0)
                        {
                                a[bp]++;
                                if (a[bp] <= d-1) break;
                                a[bp] = 0;
                                --bp;
                        }
                        if (bp < 0)
                                break;
                        a[p] = 0;
                }
                if (validate(a, d))
                {
                        ++r;
                }
        }

        free(a);
        return r;
}

int main(int argc, char** argv)
{
    if (argc != 2) return -1;
    int r = solve((int) strtol(argv[1], NULL, 10));
    printf("%d solutions\n", r);
}
```

clang 3.5's result:

```
                public _main
_main           proc near

var_48          = qword ptr -48h
var_40          = qword ptr -40h
var_34          = dword ptr -34h

                push    rbp
                push    r15
                push    r14
                push    r13
                push    r12
                push    rbx
                sub     rsp, 18h
                mov     ebx, 0FFFFFFFFh
                cmp     edi, 2
                jnz     loc_100000F29
                mov     rdi, [rsi+8]    ; char *
                xor     r14d, r14d
                xor     esi, esi        ; char **
                mov     edx, 0Ah        ; int
                call    _strtol
                mov     r15, rax
                shl     rax, 20h
                mov     rsi, offset __mh_execute_header
                add     rsi, rax
                sar     rsi, 20h        ; size_t
                mov     edi, 4          ; size_t
                call    _calloc
                lea     edx, [r15-1]
                movsxd  r8, edx
                mov     ecx, r15d
                add     ecx, 0FFFFFFFEh
                js      loc_100000DFA
                test    r15d, r15d
                mov     r11d, [rax+r8*4]
                jle     loc_100000EAE
                mov     ecx, r15d
                add     ecx, 0FFFFFFFEh
                mov     [rsp+48h+var_34], ecx
                movsxd  rcx, ecx
                lea     rcx, [rax+rcx*4]
                mov     [rsp+48h+var_40], rcx
                lea     rcx, [rax+4]
                mov     [rsp+48h+var_48], rcx
                xor     r14d, r14d
                jmp     short loc_100000D33
; ---------------------------------------------------------------------------
                align 10h

loc_100000D30:                          ; CODE XREF: _main+129 j
                                        ; _main+131 j ...
                add     r14d, ebx

loc_100000D33:                          ; CODE XREF: _main+92 j
                cmp     r11d, edx
                lea     edi, [r11+1]
                mov     [rax+r8*4], edi
                mov     rcx, [rsp+48h+var_40]
                mov     esi, [rsp+48h+var_34]
                mov     r11d, edi
                jl      short loc_100000D84
                nop     dword ptr [rax+00h]

loc_100000D50:                          ; CODE XREF: _main+DA j
                mov     edi, [rcx]
                lea     ebp, [rdi+1]
                mov     [rcx], ebp
                cmp     edi, edx
                jl      short loc_100000D71
                mov     dword ptr [rcx], 0
                add     rcx, 0FFFFFFFFFFFFFFFCh
                test    esi, esi
                lea     esi, [rsi-1]
                jg      short loc_100000D50
                jmp     loc_100000F0E
; ---------------------------------------------------------------------------

loc_100000D71:                          ; CODE XREF: _main+C9 j
                test    esi, esi
                js      loc_100000F0E
                mov     dword ptr [rax+r8*4], 0
                xor     r11d, r11d

loc_100000D84:                          ; CODE XREF: _main+BA j
                cmp     r15d, 1
                mov     esi, 0
                mov     r9, [rsp+48h+var_48]
                mov     r12d, 1
                jle     short loc_100000DF0

loc_100000D99:                          ; CODE XREF: _main+15E j
                mov     r10d, [rax+rsi*4]
                mov     ecx, 0FFFFFFFFh
                mov     edi, 1
                mov     r13, r9
                nop     word ptr [rax+rax+00h]

loc_100000DB0:                          ; CODE XREF: _main+14F j
                xor     ebx, ebx
                mov     ebp, r10d
                sub     ebp, [r13+0]
                jz      loc_100000D30
                cmp     ecx, ebp
                jz      loc_100000D30
                cmp     edi, ebp
                jz      loc_100000D30
                add     r13, 4
                inc     rdi
                dec     ecx
                mov     ebx, edi
                add     ebx, esi
                cmp     ebx, r15d
                jl      short loc_100000DB0
                inc     r12
                add     r9, 4
                inc     rsi
                cmp     r12d, r15d
                jl      short loc_100000D99

loc_100000DF0:                          ; CODE XREF: _main+107 j
                mov     ebx, 1
                jmp     loc_100000D30
; ---------------------------------------------------------------------------

loc_100000DFA:                          ; CODE XREF: _main+5E j
                mov     ecx, [rax+r8*4]
                lea     r9d, [rcx+1]
                mov     [rax+r8*4], r9d
                cmp     ecx, r8d
                jge     loc_100000F0E
                lea     r12, [rax+4]
                xor     r14d, r14d
                db      2Eh
                nop     word ptr [rax+rax+00000000h]

loc_100000E20:                          ; CODE XREF: _main+216 j
                test    r15d, r15d
                setle   cl
                cmp     r15d, 2
                jl      short loc_100000E90
                test    cl, cl
                mov     r13d, 0
                mov     r11, r12
                mov     r10d, 1
                jnz     short loc_100000E90

loc_100000E3F:                          ; CODE XREF: _main+1F0 j
                mov     edi, [rax+r13*4]
                mov     edx, 0FFFFFFFFh
                mov     ecx, 1
                mov     rsi, r11

loc_100000E50:                          ; CODE XREF: _main+1E1 j
                xor     ebx, ebx
                mov     ebp, edi
                sub     ebp, [rsi]
                jz      short loc_100000E95
                cmp     edx, ebp
                jz      short loc_100000E95
                cmp     ecx, ebp
                jz      short loc_100000E95
                add     rsi, 4
                inc     rcx
                dec     edx
                mov     ebx, ecx
                add     ebx, r13d
                cmp     ebx, r15d
                jl      short loc_100000E50
                inc     r10
                add     r11, 4
                inc     r13
                cmp     r10d, r15d
                jl      short loc_100000E3F
                db      66h, 66h, 66h, 66h, 2Eh
                nop     word ptr [rax+rax+00000000h]

loc_100000E90:                          ; CODE XREF: _main+19A j
                                        ; _main+1AD j
                mov     ebx, 1

loc_100000E95:                          ; CODE XREF: _main+1C6 j
                                        ; _main+1CA j ...
                add     r14d, ebx
                cmp     r9d, r8d
                lea     ecx, [r9+1]
                mov     [rax+r8*4], ecx
                mov     r9d, ecx
                jl      loc_100000E20
                jmp     short loc_100000F0E
; ---------------------------------------------------------------------------

loc_100000EAE:                          ; CODE XREF: _main+6B j
                add     r15d, 0FFFFFFFEh
                movsxd  rcx, r15d
                lea     rcx, [rax+rcx*4]
                xor     r14d, r14d
                jmp     short loc_100000EC6
; ---------------------------------------------------------------------------
                align 20h

loc_100000EC0:                          ; CODE XREF: _main+247 j
                                        ; _main+27C j
                inc     r14d
                mov     r11d, ebp

loc_100000EC6:                          ; CODE XREF: _main+22C j
                lea     ebp, [r11+1]
                mov     [rax+r8*4], ebp
                cmp     r11d, r8d
                mov     rsi, rcx
                mov     edi, r15d
                jl      short loc_100000EC0
                nop     dword ptr [rax+00000000h]

loc_100000EE0:                          ; CODE XREF: _main+26A j
                mov     ebp, [rsi]
                lea     ebx, [rbp+1]
                mov     [rsi], ebx
                cmp     ebp, edx
                jl      short loc_100000EFE
                mov     dword ptr [rsi], 0
                add     rsi, 0FFFFFFFFFFFFFFFCh
                test    edi, edi
                lea     edi, [rdi-1]
                jg      short loc_100000EE0
                jmp     short loc_100000F0E
; ---------------------------------------------------------------------------

loc_100000EFE:                          ; CODE XREF: _main+259 j
                test    edi, edi
                js      short loc_100000F0E
                mov     dword ptr [rax+r8*4], 0
                xor     ebp, ebp
                jmp     short loc_100000EC0
; ---------------------------------------------------------------------------

loc_100000F0E:                          ; CODE XREF: _main+DC j
                                        ; _main+E3 j ...
                mov     rdi, rax        ; void *
                call    _free
                lea     rdi, aDSolutions ; "%d solutions\n"
                xor     ebx, ebx
                xor     eax, eax
                mov     esi, r14d
                call    _printf

loc_100000F29:                          ; CODE XREF: _main+16 j
                mov     eax, ebx
                add     rsp, 18h
                pop     rbx
                pop     r12
                pop     r13
                pop     r14
                pop     r15
                pop     rbp
                retn
_main           endp
```

clang 3.6's result:

```
                public _main
_main           proc near

var_60          = qword ptr -60h
var_58          = qword ptr -58h
var_50          = qword ptr -50h
var_48          = qword ptr -48h
var_40          = qword ptr -40h
var_38          = qword ptr -38h

                push    rbp
                push    r15
                push    r14
                push    r13
                push    r12
                push    rbx
                sub     rsp, 38h
                mov     ebx, 0FFFFFFFFh
                cmp     edi, 2
                jnz     loc_100000F23
                mov     rbx, offset __mh_execute_header
                mov     rdi, [rsi+8]    ; char *
                xor     r13d, r13d
                xor     esi, esi        ; char **
                mov     edx, 0Ah        ; int
                call    _strtol
                mov     r14, rax
                shl     rax, 20h
                mov     [rsp+68h+var_38], rax
                lea     rsi, [rax+rbx]
                sar     rsi, 20h        ; size_t
                mov     edi, 4          ; size_t
                call    _calloc
                lea     r11d, [r14-1]
                movsxd  r12, r11d
                mov     [rsp+68h+var_40], r12
                movsxd  rcx, r14d
                mov     [rsp+68h+var_50], rcx
                add     ecx, 0FFFFFFFEh
                js      loc_100000E1A
                mov     ecx, r14d
                add     ecx, 0FFFFFFFEh
                movsxd  rcx, ecx
                inc     rcx
                mov     [rsp+68h+var_58], rcx
                mov     rcx, rax
                add     rcx, 4
                mov     [rsp+68h+var_60], rcx
                xor     ebp, ebp
                jmp     short loc_100000D17
; ---------------------------------------------------------------------------
                align 10h

loc_100000D10:                          ; CODE XREF: _main+15B j
                                        ; _main+163 j ...
                mov     rbp, [rsp+68h+var_48]
                add     ebp, edi

loc_100000D17:                          ; CODE XREF: _main+93 j
                cmp     r13d, r11d
                lea     edx, [r13+1]
                mov     [rax+r12*4], edx
                mov     rcx, [rsp+68h+var_58]
                mov     r13d, edx
                jl      short loc_100000D6B
                nop     dword ptr [rax+00h]

loc_100000D30:                          ; CODE XREF: _main+DE j
                mov     edx, [rax+rcx*4-4]
                lea     esi, [rdx+1]
                mov     [rax+rcx*4-4], esi
                cmp     edx, r11d
                jl      short loc_100000D60
                mov     dword ptr [rax+rcx*4-4], 0
                dec     rcx
                test    rcx, rcx
                jg      short loc_100000D30
                jmp     loc_100000F09
; ---------------------------------------------------------------------------
                align 20h

loc_100000D60:                          ; CODE XREF: _main+CE j
                mov     dword ptr [rax+r12*4], 0
                xor     r13d, r13d

loc_100000D6B:                          ; CODE XREF: _main+BA j
                mov     [rsp+68h+var_48], rbp
                test    r14d, r14d
                setle   cl
                mov     rdx, offset __mh_execute_header
                lea     rdx, [rdx+1]
                cmp     [rsp+68h+var_38], rdx
                jl      loc_100000E10
                test    cl, cl
                mov     edx, 0
                mov     r10, [rsp+68h+var_60]
                mov     r9d, 1
                jnz     short loc_100000E10

loc_100000DA3:                          ; CODE XREF: _main+195 j
                mov     esi, [rax+rdx*4]
                mov     r15d, 0FFFFFFFFh
                mov     r8d, 1
                mov     rcx, r10
                db      66h, 66h, 2Eh
                nop     dword ptr [rax+rax+00000000h]

loc_100000DC0:                          ; CODE XREF: _main+184 j
                mov     ebx, [rcx]
                mov     ebp, esi
                sub     ebp, ebx
                xor     edi, edi
                cmp     r8d, ebp
                jz      loc_100000D10
                cmp     esi, ebx
                jz      loc_100000D10
                cmp     r15d, ebp
                jz      loc_100000D10
                add     rcx, 4
                inc     r8
                dec     r15d
                mov     edi, r8d
                add     edi, edx
                cmp     edi, r14d
                jl      short loc_100000DC0
                inc     r9
                add     r10, 4
                inc     rdx
                cmp     r9, [rsp+68h+var_50]
                jl      short loc_100000DA3
                nop     word ptr [rax+rax+00000000h]

loc_100000E10:                          ; CODE XREF: _main+119 j
                                        ; _main+131 j
                mov     edi, 1
                jmp     loc_100000D10
; ---------------------------------------------------------------------------

loc_100000E1A:                          ; CODE XREF: _main+6E j
                test    r14d, r14d
                jle     loc_100000F00
                mov     dword ptr [rax+r12*4], 1
                xor     ebp, ebp
                cmp     r14d, 2
                jl      loc_100000F09
                mov     rcx, rax
                add     rcx, 4
                mov     [rsp+68h+var_48], rcx
                xor     ebp, ebp
                mov     r15d, 1
                nop     dword ptr [rax+rax+00h]

loc_100000E50:                          ; CODE XREF: _main+288 j
                mov     rbx, rbp
                mov     rcx, offset __mh_execute_header
                cmp     [rsp+68h+var_38], rcx
                mov     edx, 0
                mov     r13, [rsp+68h+var_48]
                mov     r8d, 1
                mov     r9d, 1
                jle     short loc_100000EE0

loc_100000E7A:                          ; CODE XREF: _main+25A j
                mov     r12d, [rax+rdx*4]
                mov     edi, 0FFFFFFFFh
                mov     ecx, 1
                mov     rsi, r13
                nop     dword ptr [rax+rax+00h]

loc_100000E90:                          ; CODE XREF: _main+249 j
                mov     r10d, [rsi]
                mov     ebp, r12d
                sub     ebp, r10d
                xor     r9d, r9d
                cmp     ecx, ebp
                jz      short loc_100000EE0
                cmp     r12d, r10d
                jz      short loc_100000EE0
                cmp     edi, ebp
                jz      short loc_100000EE0
                add     rsi, 4
                inc     rcx
                dec     edi
                mov     ebp, ecx
                add     ebp, edx
                cmp     ebp, r14d
                jl      short loc_100000E90
                inc     r8
                add     r13, 4
                inc     rdx
                cmp     r8, [rsp+68h+var_50]
                jl      short loc_100000E7A
                mov     r9d, 1
                db      66h, 66h, 66h, 66h, 2Eh
                nop     word ptr [rax+rax+00000000h]

loc_100000EE0:                          ; CODE XREF: _main+208 j
                                        ; _main+22E j ...
                mov     rbp, rbx
                add     ebp, r9d
                cmp     r15d, r11d
                lea     ecx, [r15+1]
                mov     rdx, [rsp+68h+var_40]
                mov     [rax+rdx*4], ecx
                mov     r15d, ecx
                jl      loc_100000E50
                jmp     short loc_100000F09
; ---------------------------------------------------------------------------

loc_100000F00:                          ; CODE XREF: _main+1AD j
                xor     ebp, ebp
                test    r11d, r11d
                cmovns  ebp, r11d

loc_100000F09:                          ; CODE XREF: _main+E0 j
                                        ; _main+1C1 j ...
                mov     rdi, rax        ; void *
                call    _free
                lea     rdi, aDSolutions ; "%d solutions\n"
                xor     ebx, ebx
                xor     eax, eax
                mov     esi, ebp
                call    _printf

loc_100000F23:                          ; CODE XREF: _main+16 j
                mov     eax, ebx
                add     rsp, 38h
                pop     rbx
                pop     r12
                pop     r13
                pop     r14
                pop     r15
                pop     rbp
                retn
_main           endp
```

gcc-4.9.2's result:
```

_main           proc near

var_48          = qword ptr -48h
var_40          = dword ptr -40h
var_3C          = dword ptr -3Ch

                cmp     edi, 2
                jz      short loc_100000D69
                or      eax, 0FFFFFFFFh
                retn
; ---------------------------------------------------------------------------

loc_100000D69:                          ; CODE XREF: _main+3 j
                push    r15
                mov     edx, 0Ah        ; int
                push    r14
                push    r13
                push    r12
                push    rbp
                push    rbx
                sub     rsp, 18h
                mov     rdi, [rsi+8]    ; char *
                xor     esi, esi        ; char **
                call    _strtol
                mov     edi, 4          ; size_t
                lea     esi, [rax+1]
                mov     r14, rax
                mov     ebx, eax
                lea     r15d, [r14-2]
                movsxd  rsi, esi        ; size_t
                call    _calloc
                mov     [rsp+48h+var_3C], 0
                mov     rdi, rax        ; void *
                lea     eax, [r14-1]
                cdqe
                lea     r13, [rdi+rax*4]
                movsxd  rax, r15d
                mov     ebp, [r13+0]
                shl     rax, 2
                lea     r12, [rdi+rax]
                lea     rax, [rdi+rax-4]
                mov     [rsp+48h+var_48], rax
                mov     eax, r14d
                lea     r14d, [r14+1]
                nop     word ptr [rax+rax+00h]
                nop     word ptr [rax+rax+00h]

loc_100000DE0:                          ; CODE XREF: _main+12B j
                                        ; _main+155 j ...
                add     ebp, 1
                cmp     ebx, ebp
                mov     [r13+0], ebp
                jg      short loc_100000E62
                test    r15d, r15d
                js      short loc_100000E33
                mov     ecx, [r12]
                lea     edx, [rcx+1]
                cmp     ebx, edx
                mov     [r12], edx
                jg      short loc_100000E58
                mov     r8, r12
                mov     rcx, [rsp+48h+var_48]
                mov     esi, r15d
                jmp     short loc_100000E24
; ---------------------------------------------------------------------------
                align 10h

loc_100000E10:                          ; CODE XREF: _main+D1 j
                mov     edx, [rcx]
                sub     r8, 4
                sub     rcx, 4
                add     edx, 1
                mov     [rcx+4], edx
                cmp     ebx, edx
                jg      short loc_100000E58

loc_100000E24:                          ; CODE XREF: _main+A9 j
                sub     esi, 1
                mov     dword ptr [r8], 0
                cmp     esi, 0FFFFFFFFh
                jnz     short loc_100000E10

loc_100000E33:                          ; CODE XREF: _main+8E j
                call    _free
                mov     esi, [rsp+48h+var_3C]
                add     rsp, 18h
                xor     eax, eax
                pop     rbx
                lea     rdi, aDSolutions ; "%d solutions\n"
                pop     rbp
                pop     r12
                pop     r13
                pop     r14
                pop     r15
                jmp     _printf
; ---------------------------------------------------------------------------

loc_100000E58:                          ; CODE XREF: _main+9D j
                                        ; _main+C2 j
                mov     dword ptr [r13+0], 0
                xor     ebp, ebp

loc_100000E62:                          ; CODE XREF: _main+89 j
                test    ebx, ebx
                jle     loc_100000EE6
                lea     r11, [rdi+8]
                xor     r10d, r10d

loc_100000E71:                          ; CODE XREF: _main+184 j
                add     r10d, 1
                cmp     r10d, eax
                jz      short loc_100000EE6
                mov     r8d, [r11-8]
                mov     edx, r8d
                sub     edx, [r11-4]
                add     edx, 1
                cmp     edx, 2
                jbe     loc_100000DE0
                mov     r9d, r14d
                mov     rcx, r11
                mov     edx, 1
                mov     [rsp+48h+var_40], r10d
                sub     r9d, r10d
                jmp     short loc_100000ED3
; ---------------------------------------------------------------------------
                align 10h

loc_100000EB0:                          ; CODE XREF: _main+179 j
                mov     esi, r8d
                sub     esi, [rcx]
                jz      loc_100000DE0
                mov     r10d, esi
                add     rcx, 4
                add     r10d, edx
                jz      loc_100000DE0
                cmp     esi, edx
                jz      loc_100000DE0

loc_100000ED3:                          ; CODE XREF: _main+144 j
                add     edx, 1
                cmp     edx, r9d
                jnz     short loc_100000EB0
                mov     r10d, [rsp+48h+var_40]
                add     r11, 4
                jmp     short loc_100000E71
; ---------------------------------------------------------------------------

loc_100000EE6:                          ; CODE XREF: _main+104 j
                                        ; _main+118 j
                add     [rsp+48h+var_3C], 1
                jmp     loc_100000DE0
_main           endp
```

MSVC 10.0's result:

```

_main           proc near               ; CODE XREF: ___tmainCRTStartup+106 p

var_80          = dword ptr -80h
var_7C          = dword ptr -7Ch
var_78          = dword ptr -78h
var_74          = dword ptr -74h
var_70          = dword ptr -70h
var_6C          = dword ptr -6Ch
var_68          = dword ptr -68h
var_64          = dword ptr -64h
var_60          = dword ptr -60h
var_5C          = dword ptr -5Ch
argc            = dword ptr  8
argv            = dword ptr  0Ch
envp            = dword ptr  10h

                push    ebp
                mov     ebp, esp
                and     esp, 0FFFFFF80h
                push    esi
                push    edi
                push    ebx
                sub     esp, 74h
                push    3
                call    sub_4080F0
                add     esp, 4
                stmxcsr [esp+80h+var_80]
                or      [esp+80h+var_80], 8000h
                ldmxcsr [esp+80h+var_80]
                cmp     [ebp+argc], 2
                jz      short loc_40103A
                mov     eax, 0FFFFFFFFh
                add     esp, 74h
                pop     ebx
                pop     edi
                pop     esi
                mov     esp, ebp
                pop     ebp
                retn
; ---------------------------------------------------------------------------

loc_40103A:                             ; CODE XREF: _main+29 j
                call    ds:GetTickCount
                mov     esi, eax
                mov     eax, [ebp+argv]
                push    dword ptr [eax+4] ; char *
                call    _atoi
                mov     edi, eax
                lea     eax, [edi+1]
                push    eax             ; size_t
                push    4               ; size_t
                call    _calloc
                add     esp, 0Ch
                mov     ecx, [eax+edi*4-4]
                lea     edx, [edi-1]
                mov     [esp+80h+var_6C], ecx
                xor     ebx, ebx
                mov     [esp+80h+var_7C], ebx
                lea     ecx, [eax+edi*4]
                mov     [esp+80h+var_74], ecx
                lea     ecx, [edi-2]
                mov     [esp+80h+var_70], ecx
                mov     [esp+80h+var_60], edx
                mov     [esp+80h+var_80], esi
                mov     ecx, [esp+80h+var_6C]

loc_401087:                             ; CODE XREF: _main+142 j
                                        ; _main+193 j
                mov     edx, [esp+80h+var_60]
                inc     ecx
                mov     [eax+edi*4-4], ecx
                cmp     edi, [eax+edx*4]
                jg      short loc_4010DC
                mov     esi, [esp+80h+var_70]
                test    esi, esi
                js      short loc_4010CE
                xor     edx, edx
                mov     [esp+80h+var_78], eax
                xor     ebx, ebx
                mov     eax, [esp+80h+var_74]

loc_4010A9:                             ; CODE XREF: _main+C8 j
                mov     ecx, [eax+ebx*4-8]
                inc     ecx
                cmp     ecx, edi
                jl      loc_40117A
                inc     edx
                lea     esi, [ebx+edi-3]
                mov     dword ptr [eax+ebx*4-8], 0
                dec     ebx
                cmp     edx, [esp+80h+var_60]
                jb      short loc_4010A9
                mov     eax, [esp+80h+var_78]

loc_4010CE:                             ; CODE XREF: _main+9B j
                                        ; _main+186 j
                test    esi, esi
                jl      short loc_401147
                mov     dword ptr [eax+edi*4-4], 0
                xor     ecx, ecx

loc_4010DC:                             ; CODE XREF: _main+93 j
                test    edi, edi
                jle     short loc_40113E
                mov     [esp+80h+var_6C], ecx
                xor     edx, edx
                mov     [esp+80h+var_5C], edi

loc_4010EA:                             ; CODE XREF: _main+132 j
                lea     ecx, [edx+1]
                mov     ebx, ecx
                mov     esi, ebx
                cmp     ecx, [esp+80h+var_5C]
                jge     short loc_401130
                mov     edx, [eax+edx*4]
                mov     edi, 1
                mov     [esp+80h+var_64], esi
                mov     [esp+80h+var_68], ecx

loc_401107:                             ; CODE XREF: _main+122 j
                mov     esi, [eax+ebx*4]
                cmp     edx, esi
                jz      short loc_40118B
                sub     esi, edx
                mov     ecx, esi
                neg     ecx
                cmp     edi, ecx
                jz      short loc_40118B
                cmp     esi, edi
                jz      short loc_40118B
                inc     ebx
                inc     edi
                cmp     ebx, [esp+80h+var_5C]
                jl      short loc_401107
                mov     ecx, [esp+80h+var_68]
                mov     esi, [esp+80h+var_64]
                cmp     ecx, [esp+80h+var_5C]

loc_401130:                             ; CODE XREF: _main+F5 j
                mov     edx, esi
                jl      short loc_4010EA
                xchg    ax, ax
                mov     ecx, [esp+80h+var_6C]
                mov     edi, [esp+80h+var_5C]

loc_40113E:                             ; CODE XREF: _main+DE j
                inc     [esp+80h+var_7C]
                jmp     loc_401087
; ---------------------------------------------------------------------------

loc_401147:                             ; CODE XREF: _main+D0 j
                mov     ebx, [esp+80h+var_7C]
                mov     esi, [esp+80h+var_80]
                push    eax             ; void *
                call    _free
                add     esp, 4
                call    ds:GetTickCount
                sub     eax, esi
                push    eax
                push    ebx
                push    offset aDSolutionsInDM ; "%d solutions in %d msecs.\n"
                call    _printf
                xor     eax, eax
                add     esp, 80h
                pop     ebx
                pop     edi
                pop     esi
                mov     esp, ebp
                pop     ebp
                retn
; ---------------------------------------------------------------------------

loc_40117A:                             ; CODE XREF: _main+B0 j
                mov     edx, [esp+80h+var_74]
                mov     eax, [esp+80h+var_78]
                mov     [edx+ebx*4-8], ecx
                jmp     loc_4010CE
; ---------------------------------------------------------------------------

loc_40118B:                             ; CODE XREF: _main+10C j
                                        ; _main+116 j ...
                mov     ecx, [esp+80h+var_6C]
                mov     edi, [esp+80h+var_5C]
                jmp     loc_401087
_main           endp
```



More information about the llvm-dev mailing list