[LLVMdev] trunk's optimizer generates slower code than 3.5
Jack Howarth
howarth.mailing.lists at gmail.com
Fri Feb 13 11:47:29 PST 2015
Also confirmed with the llvm 3.5.1 release and the llvm 3.6 release
branch on x86_64-apple-darwin14...
% clang-3.5 -O3 -mssse3 -fomit-frame-pointer -fno-stack-protector
-fno-exceptions -o 8 8.c
% time ./8 9
352 solutions
3.603u 0.002s 0:03.60 100.0% 0+0k 0+0io 2pf+0w
% time ./8 10
724 solutions
104.217u 0.059s 1:44.30 99.9% 0+0k 0+0io 2pf+0w
% clang-3.6 -O3 -mssse3 -fomit-frame-pointer -fno-stack-protector
-fno-exceptions -o 8 8.c
% time ./8 9
352 solutions
4.050u 0.001s 0:04.05 100.0% 0+0k 0+0io 2pf+0w
% time ./8 10
724 solutions
114.808u 0.041s 1:54.86 99.9% 0+0k 0+0io 2pf+0w
On Fri, Feb 13, 2015 at 3:37 AM, 191919 <191919 at gmail.com> wrote:
> I submitted the problem report to clang's bugzilla but no one seems to
> care so I have to send it to the mailing list.
>
> clang 3.7 svn (trunk 229055 as the time I was to report this problem)
> generates slower code than 3.5 (Apple LLVM version 6.0
> (clang-600.0.56) (based on LLVM 3.5svn)) for the following code.
>
> It is a "8 queens puzzle" solver written as an educational example. As
> compiled by both clang 3.5 and 3.7, it gave the correct answer, but
> clang 3.5 generates code which runs 20% faster than 3.6/3.7.
>
> ##########################################
> # clang 3.5 which comes with Xcode 6.1.1
> ##########################################
> $ clang -O3 -mssse3 -fomit-frame-pointer -fno-stack-protector
> -fno-exceptions -o 8 8.c
> $ time ./8 9 # 9 queens
> 352 solutions
> $ time ./8 10 # 10 queens
> ./8 9 1.63s user 0.00s system 99% cpu 1.632 total
> 724 solutions
> ./8 10 45.11s user 0.01s system 99% cpu 45.121 total
>
> ##########################################
> # clang 3.7 svn trunk
> ##########################################
> $ /opt/bin/clang -O3 -mssse3 -fomit-frame-pointer -fno-stack-protector
> -fno-exceptions -o 8 8.c
> $ time ./8 9 # 9 queens
> 352 solutions
> ./8 9 2.07s user 0.00s system 99% cpu 2.078 total
> $ time ./8 10 # 10 queens
> 724 solutions
> ./8 10 56.63s user 0.02s system 99% cpu 56.650 total
>
> The source code is below, I also attached the executable files as well
> as the assembly code files for clang 3.5 and 3.6 by IDA.
>
> The performance is even worse when compiling as 32-bit code while
> gcc-4.9.2 is not affected.
>
> ########## clang-3.5
> $ clang -m32 -O3 -fomit-frame-pointer -fno-stack-protector
> -fno-exceptions -o 8 8.c
> $ time ./8 9
> 352 solutions
> ./8 9 1.95s user 0.00s system 99% cpu 1.950 total
>
> ########## clang-3.7
> $ /opt/bin/clang -m32 -O3 -fomit-frame-pointer -fno-stack-protector
> -fno-exceptions -o 8 8.c
> $ time ./8 9
> 352 solutions
> ./8 9 2.48s user 0.00s system 99% cpu 2.480 total
>
> ######### gcc-4.9.2
> $ /opt/bin/gcc -m32 -O3 -fomit-frame-pointer -fno-stack-protector
> -fno-exceptions -o 8 8.c
> $ time ./8 9
> 352 solutions
> ./8 9 1.44s user 0.00s system 99% cpu 1.442 total
>
>
> ```
> #include <stdio.h>
> #include <stdlib.h>
>
> static inline int validate(int* a, int d)
> {
> int i, j, x;
> for (i = 0; i < d; ++i)
> {
> for (j = i+1, x = 1; j < d; ++j, ++x)
> {
> const int d = a[i] - a[j];
> if (d == 0 || d == -x || d == x) return 0;
> }
> }
> return 1;
> }
>
> static inline int solve(int d)
> {
> int r = 0;
> int* a = (int*) calloc(sizeof(int), d+1);
> int p = d - 1;
>
> for (;;)
> {
> a[p]++;
>
> if (a[p] > d-1)
> {
> int bp = p - 1;
> while (bp >= 0)
> {
> a[bp]++;
> if (a[bp] <= d-1) break;
> a[bp] = 0;
> --bp;
> }
> if (bp < 0)
> break;
> a[p] = 0;
> }
> if (validate(a, d))
> {
> ++r;
> }
> }
>
> free(a);
> return r;
> }
>
> int main(int argc, char** argv)
> {
> if (argc != 2) return -1;
> int r = solve((int) strtol(argv[1], NULL, 10));
> printf("%d solutions\n", r);
> }
> ```
>
> clang 3.5's result:
>
> ```
> public _main
> _main proc near
>
> var_48 = qword ptr -48h
> var_40 = qword ptr -40h
> var_34 = dword ptr -34h
>
> push rbp
> push r15
> push r14
> push r13
> push r12
> push rbx
> sub rsp, 18h
> mov ebx, 0FFFFFFFFh
> cmp edi, 2
> jnz loc_100000F29
> mov rdi, [rsi+8] ; char *
> xor r14d, r14d
> xor esi, esi ; char **
> mov edx, 0Ah ; int
> call _strtol
> mov r15, rax
> shl rax, 20h
> mov rsi, offset __mh_execute_header
> add rsi, rax
> sar rsi, 20h ; size_t
> mov edi, 4 ; size_t
> call _calloc
> lea edx, [r15-1]
> movsxd r8, edx
> mov ecx, r15d
> add ecx, 0FFFFFFFEh
> js loc_100000DFA
> test r15d, r15d
> mov r11d, [rax+r8*4]
> jle loc_100000EAE
> mov ecx, r15d
> add ecx, 0FFFFFFFEh
> mov [rsp+48h+var_34], ecx
> movsxd rcx, ecx
> lea rcx, [rax+rcx*4]
> mov [rsp+48h+var_40], rcx
> lea rcx, [rax+4]
> mov [rsp+48h+var_48], rcx
> xor r14d, r14d
> jmp short loc_100000D33
> ; ---------------------------------------------------------------------------
> align 10h
>
> loc_100000D30: ; CODE XREF: _main+129 j
> ; _main+131 j ...
> add r14d, ebx
>
> loc_100000D33: ; CODE XREF: _main+92 j
> cmp r11d, edx
> lea edi, [r11+1]
> mov [rax+r8*4], edi
> mov rcx, [rsp+48h+var_40]
> mov esi, [rsp+48h+var_34]
> mov r11d, edi
> jl short loc_100000D84
> nop dword ptr [rax+00h]
>
> loc_100000D50: ; CODE XREF: _main+DA j
> mov edi, [rcx]
> lea ebp, [rdi+1]
> mov [rcx], ebp
> cmp edi, edx
> jl short loc_100000D71
> mov dword ptr [rcx], 0
> add rcx, 0FFFFFFFFFFFFFFFCh
> test esi, esi
> lea esi, [rsi-1]
> jg short loc_100000D50
> jmp loc_100000F0E
> ; ---------------------------------------------------------------------------
>
> loc_100000D71: ; CODE XREF: _main+C9 j
> test esi, esi
> js loc_100000F0E
> mov dword ptr [rax+r8*4], 0
> xor r11d, r11d
>
> loc_100000D84: ; CODE XREF: _main+BA j
> cmp r15d, 1
> mov esi, 0
> mov r9, [rsp+48h+var_48]
> mov r12d, 1
> jle short loc_100000DF0
>
> loc_100000D99: ; CODE XREF: _main+15E j
> mov r10d, [rax+rsi*4]
> mov ecx, 0FFFFFFFFh
> mov edi, 1
> mov r13, r9
> nop word ptr [rax+rax+00h]
>
> loc_100000DB0: ; CODE XREF: _main+14F j
> xor ebx, ebx
> mov ebp, r10d
> sub ebp, [r13+0]
> jz loc_100000D30
> cmp ecx, ebp
> jz loc_100000D30
> cmp edi, ebp
> jz loc_100000D30
> add r13, 4
> inc rdi
> dec ecx
> mov ebx, edi
> add ebx, esi
> cmp ebx, r15d
> jl short loc_100000DB0
> inc r12
> add r9, 4
> inc rsi
> cmp r12d, r15d
> jl short loc_100000D99
>
> loc_100000DF0: ; CODE XREF: _main+107 j
> mov ebx, 1
> jmp loc_100000D30
> ; ---------------------------------------------------------------------------
>
> loc_100000DFA: ; CODE XREF: _main+5E j
> mov ecx, [rax+r8*4]
> lea r9d, [rcx+1]
> mov [rax+r8*4], r9d
> cmp ecx, r8d
> jge loc_100000F0E
> lea r12, [rax+4]
> xor r14d, r14d
> db 2Eh
> nop word ptr [rax+rax+00000000h]
>
> loc_100000E20: ; CODE XREF: _main+216 j
> test r15d, r15d
> setle cl
> cmp r15d, 2
> jl short loc_100000E90
> test cl, cl
> mov r13d, 0
> mov r11, r12
> mov r10d, 1
> jnz short loc_100000E90
>
> loc_100000E3F: ; CODE XREF: _main+1F0 j
> mov edi, [rax+r13*4]
> mov edx, 0FFFFFFFFh
> mov ecx, 1
> mov rsi, r11
>
> loc_100000E50: ; CODE XREF: _main+1E1 j
> xor ebx, ebx
> mov ebp, edi
> sub ebp, [rsi]
> jz short loc_100000E95
> cmp edx, ebp
> jz short loc_100000E95
> cmp ecx, ebp
> jz short loc_100000E95
> add rsi, 4
> inc rcx
> dec edx
> mov ebx, ecx
> add ebx, r13d
> cmp ebx, r15d
> jl short loc_100000E50
> inc r10
> add r11, 4
> inc r13
> cmp r10d, r15d
> jl short loc_100000E3F
> db 66h, 66h, 66h, 66h, 2Eh
> nop word ptr [rax+rax+00000000h]
>
> loc_100000E90: ; CODE XREF: _main+19A j
> ; _main+1AD j
> mov ebx, 1
>
> loc_100000E95: ; CODE XREF: _main+1C6 j
> ; _main+1CA j ...
> add r14d, ebx
> cmp r9d, r8d
> lea ecx, [r9+1]
> mov [rax+r8*4], ecx
> mov r9d, ecx
> jl loc_100000E20
> jmp short loc_100000F0E
> ; ---------------------------------------------------------------------------
>
> loc_100000EAE: ; CODE XREF: _main+6B j
> add r15d, 0FFFFFFFEh
> movsxd rcx, r15d
> lea rcx, [rax+rcx*4]
> xor r14d, r14d
> jmp short loc_100000EC6
> ; ---------------------------------------------------------------------------
> align 20h
>
> loc_100000EC0: ; CODE XREF: _main+247 j
> ; _main+27C j
> inc r14d
> mov r11d, ebp
>
> loc_100000EC6: ; CODE XREF: _main+22C j
> lea ebp, [r11+1]
> mov [rax+r8*4], ebp
> cmp r11d, r8d
> mov rsi, rcx
> mov edi, r15d
> jl short loc_100000EC0
> nop dword ptr [rax+00000000h]
>
> loc_100000EE0: ; CODE XREF: _main+26A j
> mov ebp, [rsi]
> lea ebx, [rbp+1]
> mov [rsi], ebx
> cmp ebp, edx
> jl short loc_100000EFE
> mov dword ptr [rsi], 0
> add rsi, 0FFFFFFFFFFFFFFFCh
> test edi, edi
> lea edi, [rdi-1]
> jg short loc_100000EE0
> jmp short loc_100000F0E
> ; ---------------------------------------------------------------------------
>
> loc_100000EFE: ; CODE XREF: _main+259 j
> test edi, edi
> js short loc_100000F0E
> mov dword ptr [rax+r8*4], 0
> xor ebp, ebp
> jmp short loc_100000EC0
> ; ---------------------------------------------------------------------------
>
> loc_100000F0E: ; CODE XREF: _main+DC j
> ; _main+E3 j ...
> mov rdi, rax ; void *
> call _free
> lea rdi, aDSolutions ; "%d solutions\n"
> xor ebx, ebx
> xor eax, eax
> mov esi, r14d
> call _printf
>
> loc_100000F29: ; CODE XREF: _main+16 j
> mov eax, ebx
> add rsp, 18h
> pop rbx
> pop r12
> pop r13
> pop r14
> pop r15
> pop rbp
> retn
> _main endp
> ```
>
> clang 3.6's result:
>
> ```
> public _main
> _main proc near
>
> var_60 = qword ptr -60h
> var_58 = qword ptr -58h
> var_50 = qword ptr -50h
> var_48 = qword ptr -48h
> var_40 = qword ptr -40h
> var_38 = qword ptr -38h
>
> push rbp
> push r15
> push r14
> push r13
> push r12
> push rbx
> sub rsp, 38h
> mov ebx, 0FFFFFFFFh
> cmp edi, 2
> jnz loc_100000F23
> mov rbx, offset __mh_execute_header
> mov rdi, [rsi+8] ; char *
> xor r13d, r13d
> xor esi, esi ; char **
> mov edx, 0Ah ; int
> call _strtol
> mov r14, rax
> shl rax, 20h
> mov [rsp+68h+var_38], rax
> lea rsi, [rax+rbx]
> sar rsi, 20h ; size_t
> mov edi, 4 ; size_t
> call _calloc
> lea r11d, [r14-1]
> movsxd r12, r11d
> mov [rsp+68h+var_40], r12
> movsxd rcx, r14d
> mov [rsp+68h+var_50], rcx
> add ecx, 0FFFFFFFEh
> js loc_100000E1A
> mov ecx, r14d
> add ecx, 0FFFFFFFEh
> movsxd rcx, ecx
> inc rcx
> mov [rsp+68h+var_58], rcx
> mov rcx, rax
> add rcx, 4
> mov [rsp+68h+var_60], rcx
> xor ebp, ebp
> jmp short loc_100000D17
> ; ---------------------------------------------------------------------------
> align 10h
>
> loc_100000D10: ; CODE XREF: _main+15B j
> ; _main+163 j ...
> mov rbp, [rsp+68h+var_48]
> add ebp, edi
>
> loc_100000D17: ; CODE XREF: _main+93 j
> cmp r13d, r11d
> lea edx, [r13+1]
> mov [rax+r12*4], edx
> mov rcx, [rsp+68h+var_58]
> mov r13d, edx
> jl short loc_100000D6B
> nop dword ptr [rax+00h]
>
> loc_100000D30: ; CODE XREF: _main+DE j
> mov edx, [rax+rcx*4-4]
> lea esi, [rdx+1]
> mov [rax+rcx*4-4], esi
> cmp edx, r11d
> jl short loc_100000D60
> mov dword ptr [rax+rcx*4-4], 0
> dec rcx
> test rcx, rcx
> jg short loc_100000D30
> jmp loc_100000F09
> ; ---------------------------------------------------------------------------
> align 20h
>
> loc_100000D60: ; CODE XREF: _main+CE j
> mov dword ptr [rax+r12*4], 0
> xor r13d, r13d
>
> loc_100000D6B: ; CODE XREF: _main+BA j
> mov [rsp+68h+var_48], rbp
> test r14d, r14d
> setle cl
> mov rdx, offset __mh_execute_header
> lea rdx, [rdx+1]
> cmp [rsp+68h+var_38], rdx
> jl loc_100000E10
> test cl, cl
> mov edx, 0
> mov r10, [rsp+68h+var_60]
> mov r9d, 1
> jnz short loc_100000E10
>
> loc_100000DA3: ; CODE XREF: _main+195 j
> mov esi, [rax+rdx*4]
> mov r15d, 0FFFFFFFFh
> mov r8d, 1
> mov rcx, r10
> db 66h, 66h, 2Eh
> nop dword ptr [rax+rax+00000000h]
>
> loc_100000DC0: ; CODE XREF: _main+184 j
> mov ebx, [rcx]
> mov ebp, esi
> sub ebp, ebx
> xor edi, edi
> cmp r8d, ebp
> jz loc_100000D10
> cmp esi, ebx
> jz loc_100000D10
> cmp r15d, ebp
> jz loc_100000D10
> add rcx, 4
> inc r8
> dec r15d
> mov edi, r8d
> add edi, edx
> cmp edi, r14d
> jl short loc_100000DC0
> inc r9
> add r10, 4
> inc rdx
> cmp r9, [rsp+68h+var_50]
> jl short loc_100000DA3
> nop word ptr [rax+rax+00000000h]
>
> loc_100000E10: ; CODE XREF: _main+119 j
> ; _main+131 j
> mov edi, 1
> jmp loc_100000D10
> ; ---------------------------------------------------------------------------
>
> loc_100000E1A: ; CODE XREF: _main+6E j
> test r14d, r14d
> jle loc_100000F00
> mov dword ptr [rax+r12*4], 1
> xor ebp, ebp
> cmp r14d, 2
> jl loc_100000F09
> mov rcx, rax
> add rcx, 4
> mov [rsp+68h+var_48], rcx
> xor ebp, ebp
> mov r15d, 1
> nop dword ptr [rax+rax+00h]
>
> loc_100000E50: ; CODE XREF: _main+288 j
> mov rbx, rbp
> mov rcx, offset __mh_execute_header
> cmp [rsp+68h+var_38], rcx
> mov edx, 0
> mov r13, [rsp+68h+var_48]
> mov r8d, 1
> mov r9d, 1
> jle short loc_100000EE0
>
> loc_100000E7A: ; CODE XREF: _main+25A j
> mov r12d, [rax+rdx*4]
> mov edi, 0FFFFFFFFh
> mov ecx, 1
> mov rsi, r13
> nop dword ptr [rax+rax+00h]
>
> loc_100000E90: ; CODE XREF: _main+249 j
> mov r10d, [rsi]
> mov ebp, r12d
> sub ebp, r10d
> xor r9d, r9d
> cmp ecx, ebp
> jz short loc_100000EE0
> cmp r12d, r10d
> jz short loc_100000EE0
> cmp edi, ebp
> jz short loc_100000EE0
> add rsi, 4
> inc rcx
> dec edi
> mov ebp, ecx
> add ebp, edx
> cmp ebp, r14d
> jl short loc_100000E90
> inc r8
> add r13, 4
> inc rdx
> cmp r8, [rsp+68h+var_50]
> jl short loc_100000E7A
> mov r9d, 1
> db 66h, 66h, 66h, 66h, 2Eh
> nop word ptr [rax+rax+00000000h]
>
> loc_100000EE0: ; CODE XREF: _main+208 j
> ; _main+22E j ...
> mov rbp, rbx
> add ebp, r9d
> cmp r15d, r11d
> lea ecx, [r15+1]
> mov rdx, [rsp+68h+var_40]
> mov [rax+rdx*4], ecx
> mov r15d, ecx
> jl loc_100000E50
> jmp short loc_100000F09
> ; ---------------------------------------------------------------------------
>
> loc_100000F00: ; CODE XREF: _main+1AD j
> xor ebp, ebp
> test r11d, r11d
> cmovns ebp, r11d
>
> loc_100000F09: ; CODE XREF: _main+E0 j
> ; _main+1C1 j ...
> mov rdi, rax ; void *
> call _free
> lea rdi, aDSolutions ; "%d solutions\n"
> xor ebx, ebx
> xor eax, eax
> mov esi, ebp
> call _printf
>
> loc_100000F23: ; CODE XREF: _main+16 j
> mov eax, ebx
> add rsp, 38h
> pop rbx
> pop r12
> pop r13
> pop r14
> pop r15
> pop rbp
> retn
> _main endp
> ```
>
> gcc-4.9.2's result:
> ```
>
> _main proc near
>
> var_48 = qword ptr -48h
> var_40 = dword ptr -40h
> var_3C = dword ptr -3Ch
>
> cmp edi, 2
> jz short loc_100000D69
> or eax, 0FFFFFFFFh
> retn
> ; ---------------------------------------------------------------------------
>
> loc_100000D69: ; CODE XREF: _main+3 j
> push r15
> mov edx, 0Ah ; int
> push r14
> push r13
> push r12
> push rbp
> push rbx
> sub rsp, 18h
> mov rdi, [rsi+8] ; char *
> xor esi, esi ; char **
> call _strtol
> mov edi, 4 ; size_t
> lea esi, [rax+1]
> mov r14, rax
> mov ebx, eax
> lea r15d, [r14-2]
> movsxd rsi, esi ; size_t
> call _calloc
> mov [rsp+48h+var_3C], 0
> mov rdi, rax ; void *
> lea eax, [r14-1]
> cdqe
> lea r13, [rdi+rax*4]
> movsxd rax, r15d
> mov ebp, [r13+0]
> shl rax, 2
> lea r12, [rdi+rax]
> lea rax, [rdi+rax-4]
> mov [rsp+48h+var_48], rax
> mov eax, r14d
> lea r14d, [r14+1]
> nop word ptr [rax+rax+00h]
> nop word ptr [rax+rax+00h]
>
> loc_100000DE0: ; CODE XREF: _main+12B j
> ; _main+155 j ...
> add ebp, 1
> cmp ebx, ebp
> mov [r13+0], ebp
> jg short loc_100000E62
> test r15d, r15d
> js short loc_100000E33
> mov ecx, [r12]
> lea edx, [rcx+1]
> cmp ebx, edx
> mov [r12], edx
> jg short loc_100000E58
> mov r8, r12
> mov rcx, [rsp+48h+var_48]
> mov esi, r15d
> jmp short loc_100000E24
> ; ---------------------------------------------------------------------------
> align 10h
>
> loc_100000E10: ; CODE XREF: _main+D1 j
> mov edx, [rcx]
> sub r8, 4
> sub rcx, 4
> add edx, 1
> mov [rcx+4], edx
> cmp ebx, edx
> jg short loc_100000E58
>
> loc_100000E24: ; CODE XREF: _main+A9 j
> sub esi, 1
> mov dword ptr [r8], 0
> cmp esi, 0FFFFFFFFh
> jnz short loc_100000E10
>
> loc_100000E33: ; CODE XREF: _main+8E j
> call _free
> mov esi, [rsp+48h+var_3C]
> add rsp, 18h
> xor eax, eax
> pop rbx
> lea rdi, aDSolutions ; "%d solutions\n"
> pop rbp
> pop r12
> pop r13
> pop r14
> pop r15
> jmp _printf
> ; ---------------------------------------------------------------------------
>
> loc_100000E58: ; CODE XREF: _main+9D j
> ; _main+C2 j
> mov dword ptr [r13+0], 0
> xor ebp, ebp
>
> loc_100000E62: ; CODE XREF: _main+89 j
> test ebx, ebx
> jle loc_100000EE6
> lea r11, [rdi+8]
> xor r10d, r10d
>
> loc_100000E71: ; CODE XREF: _main+184 j
> add r10d, 1
> cmp r10d, eax
> jz short loc_100000EE6
> mov r8d, [r11-8]
> mov edx, r8d
> sub edx, [r11-4]
> add edx, 1
> cmp edx, 2
> jbe loc_100000DE0
> mov r9d, r14d
> mov rcx, r11
> mov edx, 1
> mov [rsp+48h+var_40], r10d
> sub r9d, r10d
> jmp short loc_100000ED3
> ; ---------------------------------------------------------------------------
> align 10h
>
> loc_100000EB0: ; CODE XREF: _main+179 j
> mov esi, r8d
> sub esi, [rcx]
> jz loc_100000DE0
> mov r10d, esi
> add rcx, 4
> add r10d, edx
> jz loc_100000DE0
> cmp esi, edx
> jz loc_100000DE0
>
> loc_100000ED3: ; CODE XREF: _main+144 j
> add edx, 1
> cmp edx, r9d
> jnz short loc_100000EB0
> mov r10d, [rsp+48h+var_40]
> add r11, 4
> jmp short loc_100000E71
> ; ---------------------------------------------------------------------------
>
> loc_100000EE6: ; CODE XREF: _main+104 j
> ; _main+118 j
> add [rsp+48h+var_3C], 1
> jmp loc_100000DE0
> _main endp
> ```
>
> MSVC 10.0's result:
>
> ```
>
> _main proc near ; CODE XREF: ___tmainCRTStartup+106 p
>
> var_80 = dword ptr -80h
> var_7C = dword ptr -7Ch
> var_78 = dword ptr -78h
> var_74 = dword ptr -74h
> var_70 = dword ptr -70h
> var_6C = dword ptr -6Ch
> var_68 = dword ptr -68h
> var_64 = dword ptr -64h
> var_60 = dword ptr -60h
> var_5C = dword ptr -5Ch
> argc = dword ptr 8
> argv = dword ptr 0Ch
> envp = dword ptr 10h
>
> push ebp
> mov ebp, esp
> and esp, 0FFFFFF80h
> push esi
> push edi
> push ebx
> sub esp, 74h
> push 3
> call sub_4080F0
> add esp, 4
> stmxcsr [esp+80h+var_80]
> or [esp+80h+var_80], 8000h
> ldmxcsr [esp+80h+var_80]
> cmp [ebp+argc], 2
> jz short loc_40103A
> mov eax, 0FFFFFFFFh
> add esp, 74h
> pop ebx
> pop edi
> pop esi
> mov esp, ebp
> pop ebp
> retn
> ; ---------------------------------------------------------------------------
>
> loc_40103A: ; CODE XREF: _main+29 j
> call ds:GetTickCount
> mov esi, eax
> mov eax, [ebp+argv]
> push dword ptr [eax+4] ; char *
> call _atoi
> mov edi, eax
> lea eax, [edi+1]
> push eax ; size_t
> push 4 ; size_t
> call _calloc
> add esp, 0Ch
> mov ecx, [eax+edi*4-4]
> lea edx, [edi-1]
> mov [esp+80h+var_6C], ecx
> xor ebx, ebx
> mov [esp+80h+var_7C], ebx
> lea ecx, [eax+edi*4]
> mov [esp+80h+var_74], ecx
> lea ecx, [edi-2]
> mov [esp+80h+var_70], ecx
> mov [esp+80h+var_60], edx
> mov [esp+80h+var_80], esi
> mov ecx, [esp+80h+var_6C]
>
> loc_401087: ; CODE XREF: _main+142 j
> ; _main+193 j
> mov edx, [esp+80h+var_60]
> inc ecx
> mov [eax+edi*4-4], ecx
> cmp edi, [eax+edx*4]
> jg short loc_4010DC
> mov esi, [esp+80h+var_70]
> test esi, esi
> js short loc_4010CE
> xor edx, edx
> mov [esp+80h+var_78], eax
> xor ebx, ebx
> mov eax, [esp+80h+var_74]
>
> loc_4010A9: ; CODE XREF: _main+C8 j
> mov ecx, [eax+ebx*4-8]
> inc ecx
> cmp ecx, edi
> jl loc_40117A
> inc edx
> lea esi, [ebx+edi-3]
> mov dword ptr [eax+ebx*4-8], 0
> dec ebx
> cmp edx, [esp+80h+var_60]
> jb short loc_4010A9
> mov eax, [esp+80h+var_78]
>
> loc_4010CE: ; CODE XREF: _main+9B j
> ; _main+186 j
> test esi, esi
> jl short loc_401147
> mov dword ptr [eax+edi*4-4], 0
> xor ecx, ecx
>
> loc_4010DC: ; CODE XREF: _main+93 j
> test edi, edi
> jle short loc_40113E
> mov [esp+80h+var_6C], ecx
> xor edx, edx
> mov [esp+80h+var_5C], edi
>
> loc_4010EA: ; CODE XREF: _main+132 j
> lea ecx, [edx+1]
> mov ebx, ecx
> mov esi, ebx
> cmp ecx, [esp+80h+var_5C]
> jge short loc_401130
> mov edx, [eax+edx*4]
> mov edi, 1
> mov [esp+80h+var_64], esi
> mov [esp+80h+var_68], ecx
>
> loc_401107: ; CODE XREF: _main+122 j
> mov esi, [eax+ebx*4]
> cmp edx, esi
> jz short loc_40118B
> sub esi, edx
> mov ecx, esi
> neg ecx
> cmp edi, ecx
> jz short loc_40118B
> cmp esi, edi
> jz short loc_40118B
> inc ebx
> inc edi
> cmp ebx, [esp+80h+var_5C]
> jl short loc_401107
> mov ecx, [esp+80h+var_68]
> mov esi, [esp+80h+var_64]
> cmp ecx, [esp+80h+var_5C]
>
> loc_401130: ; CODE XREF: _main+F5 j
> mov edx, esi
> jl short loc_4010EA
> xchg ax, ax
> mov ecx, [esp+80h+var_6C]
> mov edi, [esp+80h+var_5C]
>
> loc_40113E: ; CODE XREF: _main+DE j
> inc [esp+80h+var_7C]
> jmp loc_401087
> ; ---------------------------------------------------------------------------
>
> loc_401147: ; CODE XREF: _main+D0 j
> mov ebx, [esp+80h+var_7C]
> mov esi, [esp+80h+var_80]
> push eax ; void *
> call _free
> add esp, 4
> call ds:GetTickCount
> sub eax, esi
> push eax
> push ebx
> push offset aDSolutionsInDM ; "%d solutions in %d msecs.\n"
> call _printf
> xor eax, eax
> add esp, 80h
> pop ebx
> pop edi
> pop esi
> mov esp, ebp
> pop ebp
> retn
> ; ---------------------------------------------------------------------------
>
> loc_40117A: ; CODE XREF: _main+B0 j
> mov edx, [esp+80h+var_74]
> mov eax, [esp+80h+var_78]
> mov [edx+ebx*4-8], ecx
> jmp loc_4010CE
> ; ---------------------------------------------------------------------------
>
> loc_40118B: ; CODE XREF: _main+10C j
> ; _main+116 j ...
> mov ecx, [esp+80h+var_6C]
> mov edi, [esp+80h+var_5C]
> jmp loc_401087
> _main endp
> ```
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
More information about the llvm-dev
mailing list