<html>

    <head>

      <base href="http://llvm.org/bugs/" />

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW --- - [licm] LICM promote speculative load to scalar"

   href="http://llvm.org/bugs/show_bug.cgi?id=21229">21229</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>[licm] LICM promote speculative load to scalar

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>normal

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Scalar Optimizations

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>bmakam@codeaurora.org

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvmbugs@cs.uiuc.edu

          </td>

        </tr>

        <tr>

          <th>Classification</th>

          <td>Unclassified

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Here is a simple c code:

LLVM takes a conservative approach and does not promote speculative loads to

scala in licm pass because it would break the LLVM concurrency model. While

this is true because it could introduce a race with multiple threads, but there

are still some cases where it is OK to promote speculative loads without

causing a race condition such as when the loop entry is guarded by a condition

and if we can prove that there is atleast one load.

Consider this example:

extern int globalvar;

void foo(int n , int incr) {

  unsigned int i;

  for (i = 0 ; i < n; i += incr ) {

    if (i < n/2)

      globalvar += incr;

  }

return;

}

GCC produces following output:

GCC output:

$ aarch64-linux-gnu-g++ -S -o -  -O3  -ffast-math -march=armv8-a+simd test.cpp

        .arch armv8-a+fp+simd

        .file   "test.cpp"

        .text

        .align  2

        .global _Z3fooii

        .type   _Z3fooii, %function

_Z3fooii:

.LFB0:

        .cfi_startproc

        cbz     w0, .L1

        adrp    x6, globalvar

        add     w5, w0, w0, lsr 31

        ldr     w3, [x6,#:lo12:globalvar]    <== hoist load of globalvar

        mov     w2, 0

        asr     w5, w5, 1

.L4:

        cmp     w5, w2

        add     w2, w2, w1

        add     w4, w3, w1

        csel    w3, w4, w3, hi

        cmp     w2, w0

        bcc     .L4

        str     w3, [x6,#:lo12:globalvar]    <== sink store of globalvar

.L1:

        ret

        .cfi_endproc

.LFE0:

        .size   _Z3fooii, .-_Z3fooii

        .ident  "GCC: (crosstool-NG linaro-1.13.1-4.8-2014.01 - Linaro GCC

2013.11) 4.9.0"

whereas LLVM produces following output:

$ clang-aarch64-x++ -S -o - -O3 -ffast-math -fslp-vectorize test.cpp

        .text

        .file   "test.cpp"

        .globl  _Z3fooii

        .align  2

        .type   _Z3fooii,@function

_Z3fooii:                               // @_Z3fooii

// BB#0:                                // %entry

        cbz     w0, .LBB0_5

// BB#1:                                // %for.body.lr.ph

        mov      w8, wzr

        cmp      w0, #0                 // =0

        cinc     w9, w0, lt

        asr     w9, w9, #1

        adrp    x10, globalvar

.LBB0_2:                                // %for.body

                                        // =>This Inner Loop Header: Depth=1

        cmp      w8, w9

        b.hs    .LBB0_4

// BB#3:                                // %if.then

                                        //   in Loop: Header=BB0_2 Depth=1

        ldr     w11, [x10, :lo12:globalvar]          <===== load inside loop

        add      w11, w11, w1

        str     w11, [x10, :lo12:globalvar]          <==== store inside loop

.LBB0_4:                                // %for.inc

                                        //   in Loop: Header=BB0_2 Depth=1

        add      w8, w8, w1

        cmp      w8, w0

        b.lo    .LBB0_2

.LBB0_5:                                // %for.end

        ret

.Ltmp1:

        .size   _Z3fooii, .Ltmp1-_Z3fooii

        .ident  "clang version 3.6.0 "

LLVM misses this opportunity by being too conservative

This was discussed briefly in the llvm-dev mailing list here:

<a href="http://article.gmane.org/gmane.comp.compilers.llvm.devel/76467">http://article.gmane.org/gmane.comp.compilers.llvm.devel/76467</a></pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>