[LLVMbugs] [Bug 5129] New: Ttrigonometric benchmark

Sat Oct 3 15:02:36 PDT 2009

http://llvm.org/bugs/show_bug.cgi?id=5129

           Summary: Ttrigonometric benchmark
           Product: new-bugs
           Version: 2.5
          Platform: PC
        OS/Version: Windows XP
            Status: NEW
          Keywords: code-quality
          Severity: normal
          Priority: P2
         Component: new bugs
        AssignedTo: unassignedbugs at nondot.org
        ReportedBy: bearophile at mailas.com
                CC: llvmbugs at cs.uiuc.edu

This is a simple trigonometric benchmark in C, shows some ways LLVM may improve
its generated code:

#include "math.h"
#include "stdio.h"

void test(double nloops) {
    double rsin = 0.0;
    double rcos = 0.0;
    double rtan = 0.0;
    double rlog = 0.0;

    double i = 0.0;
    while (i < nloops) {
        rsin = sin(i);
        rcos = cos(i);
        rtan = tan(i);
        //rtan = rsin / rcos;
        rlog = log10(i);
        i++;
    }

    printf(" i: %f\n", i);
    printf(" sin: %f\n", rsin);
    printf(" cos: %f\n", rcos);
    printf(" tan: %f\n", rtan);
    printf(" log10: %f\n", rlog);
}

int main() {
    double n = 20*1000*1000;
    test(n);
    return 0;
}

Main loop of the C code compile with LLVM-GCC 2.5 on Windows:

llvm-gcc -Wall -O3 -s -fomit-frame-pointer -msse3 -march=native -ffast-math
bench.c -o bench_llvm

...>elaps bench_llvm.exe
 i: 20000000.000000
 sin: -0.956130
 cos: -0.292941
 tan: 3.263897
 log10: 7.301030
0.00user 0.03system 0:15.18elapsed 0%CPU (0avgtext+0avgdata 8768maxresident)k
0inputs+0outputs (625major+0minor)pagefaults 0swaps

Note the calls to _log10m, _sin, _cos, _tan, and the amount of code after the
call to _sin:

LBB1_2: # bb
    movsd   72(%esp), %xmm0
    movsd   %xmm0, (%esp)
    call    _log10
    fstpt   56(%esp)
    movsd   72(%esp), %xmm0
    movsd   %xmm0, (%esp)
    call    _tan
    fstpt   40(%esp)
    movsd   72(%esp), %xmm0
    movsd   %xmm0, (%esp)
    call    _cos
    fstpt   24(%esp)
    movsd   72(%esp), %xmm0
    movsd   %xmm0, (%esp)
    call    _sin
    fstpl   80(%esp)
    fldt    24(%esp)
    fstpl   88(%esp)
    fldt    40(%esp)
    fstpl   96(%esp)
    fldt    56(%esp)
    fstpl   104(%esp)
    movsd   72(%esp), %xmm0
    addsd   LCPI1_0, %xmm0
    movsd   %xmm0, 72(%esp)
    ucomisd 120(%esp), %xmm0
    movsd   104(%esp), %xmm0
    movsd   %xmm0, 56(%esp)
    movsd   96(%esp), %xmm0
    movsd   %xmm0, 40(%esp)
    movsd   88(%esp), %xmm0
    movsd   %xmm0, 24(%esp)
    movsd   80(%esp), %xmm0
    movsd   %xmm0, 16(%esp)
    ##FP_REG_KILL
    jb  LBB1_2  # bb

---------------------------

Main loop C GCC:
gcc version 4.3.3-dw2-tdm-1 (GCC)

gcc -Wall -O3 -s -fomit-frame-pointer -msse3 -march=native -ffast-math bench.c
-o bench_gcc

GCC:
...>elaps bench_gcc.exe
 i: 20000000.000000
 sin: -0.956130
 cos: -0.292941
 tan: 3.263897
 log10: 7.301030
0.01user 0.00system 0:04.09elapsed 0%CPU (0avgtext+0avgdata 8800maxresident)k
0inputs+0outputs (628major+0minor)pagefaults 0swaps

Note the use of fsincos, fptan, fldlg2, fyl2x and the tight code after fyl2x:

L10:
    fstp    %st(3)
    fstp    %st(0)
L6:
    fld %st(0)
    fsincos
    fld %st(2)
    fptan
    fxch    %st(1)
    fstpl   48(%esp)
    fldlg2
    fld %st(4)
    fyl2x
    fstpl   56(%esp)
    faddp   %st, %st(3)
    fxch    %st(3)
    fcomi   %st(2), %st
    ja  L10

---------------------------

LLVM-GCC on Windows without SSE:

llvm-gcc -Wall -O3 -s -fomit-frame-pointer -S -ffast-math bench.c -o
bench_llvm.s

...>elaps bench_llvm
 i: 20000000.000000
 sin: -0.956130
 cos: -0.292941
 tan: 3.263897
 log10: 7.301030
0.00user 0.01system 0:10.62elapsed 0%CPU (0avgtext+0avgdata 8672maxresident)k
0inputs+0outputs (560major+0minor)pagefaults 0swaps

It's faster.
Note the lack of calls to _sin, _cos and _tan, the usage of fsin, fcos,
the call to _log10 and the short code after fsin.

LBB1_2: # bb
    fldl    52(%esp)
    fstpl   (%esp)
    call    _log10
    fstpl   36(%esp)
    fldl    52(%esp)
    fstpl   (%esp)
    call    _tan
    fstpl   28(%esp)
    fld1
    fldl    52(%esp)
    faddp   %st(1)
    fstl    44(%esp)
    fldl    64(%esp)
    fxch    %st(1)
    fucomi  %st(1), %st(0)
    fstp    %st(1)
    fldl    52(%esp)
    fcos
    fstpl   20(%esp)
    fldl    52(%esp)
    fsin
    fstpl   12(%esp)
    fstpl   52(%esp)
    ##FP_REG_KILL
    jb  LBB1_2  # bb

---------------------------

Main loop C GCC with division instead of tan:

gcc -Wall -O3 -s -fomit-frame-pointer -msse3 -march=native -ffast-math bench.c
-o bench_gcc

...>elaps bench_gcc
 i: 20000000.000000
 sin: -0.956130
 cos: -0.292941
 tan: 3.263897
 log10: 7.301030
0.00user 0.00system 0:02.61elapsed 0%CPU (0avgtext+0avgdata 8544maxresident)k
0inputs+0outputs (553major+0minor)pagefaults 0swaps

This is how (with -ffast-math) the code may be compiled. fptan is replaced by
a (unsafe) division.

L10:
    fstp    %st(2)
    fstp    %st(0)
L6:
    fld %st(0)
    fsincos
    fld %st(1)
    fdiv    %st(1), %st
    fstpl   48(%esp)
    fldlg2
    fld %st(3)
    fyl2x
    fstpl   56(%esp)
    fxch    %st(2)
    fadds   LC2
    fcomi   %st(3), %st
    jb  L10

---------------------------

-- 
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.