<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - Modulo to cmov conversion"

   href="https://bugs.llvm.org/show_bug.cgi?id=41255">41255</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>Modulo to cmov conversion

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Scalar Optimizations

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>david.bolvansky@gmail.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Hi,

int g = 0;

void cnt1(int n) {

    g++;

    g %= n;

}

void cnt2(int n) {

    g++;

    if (g==n) g=0;

}

int main(int argc, char**argv) {

    int e = atoi(argv[1]);

    for (int i = 0; i < e; ++i) {

        cnt1(7);

    }

    printf("%d", g);

}

CNT1 version on Haswell with -O3:

time ./a.out 65384546

3

real    0m0.294s

user    0m0.294s

sys     0m0.000s

CNT1 with GCC-9 -O3 is much faster. -O3 -fno-unroll-loops for Clang doesn't fix

it:

time ./a.out 65384546

3

real    0m0.247s

user    0m0.247s

sys     0m0.000s

GCC:

.L9:

        lea     eax, [rsi+1]

        add     ecx, 1

        movsx   rsi, eax

        cdq

        imul    rsi, rsi, -1840700269

        shr     rsi, 32

        add     esi, eax

        sar     esi, 2

        sub     esi, edx

        lea     edx, [0+rsi*8]

        sub     edx, esi

        sub     eax, edx

        mov     esi, eax

        cmp     ecx, edi

        jne     .L9

        mov     DWORD PTR g[rip], eax

Clang:

.LBB2_1:                           

        lea     ecx, [rsi + 1]

        movsxd  rcx, ecx

        imul    rcx, rcx, -1840700269

        shr     rcx, 32

        lea     ecx, [rcx + rsi]

        add     ecx, 1

        mov     edx, ecx

        shr     edx, 31

        sar     ecx, 2

        add     ecx, edx

        lea     edx, [8*rcx]

        sub     ecx, edx

        lea     esi, [rsi + rcx]

        add     esi, 1

        add     eax, -1

        jne     .LBB2_1

        mov     dword ptr [rip + g], esi

Small differences in codegen, I don't see a reason why this code is

slower/faster.

CNT2 version is the best (unrolling and generated cmov in Clang helps a lot,

GCC is slower since it does not use cmov):

time ./a.out 65384546

3

real    0m0.031s

user    0m0.031s

sys     0m0.000s

Maybe is it worth to do this transformation?</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>