<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - Loop optimization makes code bloat when moving conditionally called pure functions outside of loop"

   href="https://bugs.llvm.org/show_bug.cgi?id=38554">38554</a>

          </td>

        </tr>


        <tr>

          <th>Summary</th>

          <td>Loop optimization makes code bloat when moving conditionally called pure functions outside of loop

          </td>

        </tr>


        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>


        <tr>

          <th>Version</th>

          <td>6.0

          </td>

        </tr>


        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>


        <tr>

          <th>OS</th>

          <td>All

          </td>

        </tr>


        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>


        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>


        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>


        <tr>

          <th>Component</th>

          <td>Loop Optimizer

          </td>

        </tr>


        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>


        <tr>

          <th>Reporter</th>

          <td>o_kniemeyer@maxon.de

          </td>

        </tr>


        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>If you compile this code snippet at <a href="https://gcc.godbolt.org">https://gcc.godbolt.org</a> with -O3


int Func(const void*) __attribute__ ((pure));


int Test(int n, const void* ptr2)

{

    const void* ptr = ptr2;

    int result = 0;

    for (int i = 0; i < n; ++i)

    {

#if 1

        result += Func(ptr);

#else

        result += ptr ? Func(ptr) : 0;

#endif

    }

    return result;

}


you get the expected result:


Test(int, void const*): # @Test(int, void const*)

  pushq %rbx

  movl %edi, %ebx

  testl %ebx, %ebx

  jle .LBB0_1

  movq %rsi, %rdi

  callq Func(void const*)

  imull %ebx, %eax

  popq %rbx

  retq

.LBB0_1:

  xorl %eax, %eax

  popq %rbx

  retq


So the optimizer has moved the call to Func outside of the loop, and there's

just a multiplication with the loop count. If you use the code with nullptr

check (#if 0) the call is also moved to the outside, but the multiplication is

replaced by code bloat:


Test(int, void const*): # @Test(int, void const*)

  pushq %rbx

  movl %edi, %ebx

  xorl %ecx, %ecx

  testl %ebx, %ebx

  jle .LBB0_13

  testq %rsi, %rsi

  je .LBB0_13

  movq %rsi, %rid

  callq Func(void const*)

  xorl %edx, %edx

  movl $0, %ecx

  cmpl $7, %ebx

  jbe .LBB0_3

  movl %ebx, %edx

  andl $-8, %edx

  movd %eax, %xmm0

  pshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]

  leal -8(%rdx), %esi

  movl %esi, %edi

  shrl $3, %edi

  leal 1(%rdi), %ecx

  andl $7, %ecx

  cmpl $56, %esi

  jae .LBB0_7

  pxor %xmm1, %xmm1

  pxor %xmm2, %xmm2

  testl %ecx, %ecx

  jne .LBB0_10

  jmp .LBB0_12

.LBB0_7:

  leal -1(%rcx), %esi

  subl %edi, %esi

  pxor %xmm1, %xmm1

  pxor %xmm2, %xmm2

.LBB0_8: # =>This Inner Loop Header: Depth=1

  paddd %xmm0, %xmm1

  paddd %xmm0, %xmm2

  paddd %xmm0, %xmm1

  paddd %xmm0, %xmm2

  paddd %xmm0, %xmm1

  paddd %xmm0, %xmm2

  paddd %xmm0, %xmm1

  paddd %xmm0, %xmm2

  paddd %xmm0, %xmm1

  paddd %xmm0, %xmm2

  paddd %xmm0, %xmm1

  paddd %xmm0, %xmm2

  paddd %xmm0, %xmm1

  paddd %xmm0, %xmm2

  paddd %xmm0, %xmm1

  paddd %xmm0, %xmm2

  addl $8, %esi

  jne .LBB0_8

  testl %ecx, %ecx

  je .LBB0_12

.LBB0_10:

  negl %ecx

.LBB0_11: # =>This Inner Loop Header: Depth=1

  paddd %xmm0, %xmm1

  paddd %xmm0, %xmm2

  addl $1, %ecx

  jne .LBB0_11

.LBB0_12:

  paddd %xmm2, %xmm1

  pshufd $78, %xmm1, %xmm0 # xmm0 = xmm1[2,3,0,1]

  paddd %xmm1, %xmm0

  pshufd $229, %xmm0, %xmm1 # xmm1 = xmm0[1,1,2,3]

  paddd %xmm0, %xmm1

  movd %xmm1, %ecx

  cmpl %ebx, %edx

  je .LBB0_13

.LBB0_3:

  subl %edx, %ebx

.LBB0_4: # =>This Inner Loop Header: Depth=1

  addl %eax, %ecx

  addl $-1, %ebx

  jne .LBB0_4

.LBB0_13:

  movl %ecx, %eax

  popq %rbx

  retq


Adding -fno-vectorize helps a bit, but I doubt that the code is faster than

imull.</pre>

        </div>

      </p>


      <hr>

      <span>You are receiving this mail because:</span>


      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>