<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - X86 reassociate generates add chain instead of tree for ILP"

   href="https://bugs.llvm.org/show_bug.cgi?id=36696">36696</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>X86 reassociate generates add chain instead of tree for ILP

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>tools

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>All

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>llc

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>anna@azul.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Given an IR like the one below:

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"

target triple = "x86_64-unknown-linux-gnu"

define i32 @test(i8 addrspace(1)* %tmp, i32 %init) {

entry:

  br label %loop

loop:

  %sum = phi i32 [0, %entry], [%csum, %loop]

  %iv = phi i32 [%init, %entry], [ %ivnext, %loop]

  %c1 = getelementptr inbounds i8, i8 addrspace(1)* %tmp, i64 8

  %addr = bitcast i8 addrspace(1)* %c1 to i32 addrspace(1)*

  %c2 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c3 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c4 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c5 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c6 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c7 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c8 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c9 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c14 = add i32 %c3, %c2 

  %c13 = add i32 %c14, %c4 

  %c15 = add i32 %c13, %c5 

  %c11 = add i32 %c15, %c6 

  %c10 = add i32 %c11, %c7 

  %c12 = add i32 %c10, %c8 

  %c16 = add i32 %c12, %c9 

  %csum = add i32 %sum, %c16

  %ivnext = add nsw nuw i32 %iv, 1

  %cond = icmp ult i32 %ivnext, 100 

  br i1 %cond, label %loop, label %exit

exit:

  ret i32 %sum

}

!0 = !{!"-mcpu=haswell"}

!1 = !{!"-mattr=+sse2,+cx16,+sahf,+avx"}

The LLC machine combiner which is based on registers and critical path is not

able to convert this into add-tree form (instead of the add-chain form).

@Sanjay, could we extend <a href="https://reviews.llvm.org/D10321">https://reviews.llvm.org/D10321</a> to handle this case? I

tried reducing the adds to just 3 and see if it's some sort of register

pressure that's preventing ILP identification. Also, tried the same IR in

straight line code, but no luck. There's AVX support and this IR is run on

haswell machine.

The assembly generated with LLC -O3 is:

  movl  %ecx, %eax

  movl  8(%rdi), %ecx

  #MEMBARRIER

  movl  8(%rdi), %edx

  #MEMBARRIER

  movl  8(%rdi), %r8d

  #MEMBARRIER

  movl  8(%rdi), %r9d

  #MEMBARRIER

  movl  8(%rdi), %r10d

  #MEMBARRIER

  movl  8(%rdi), %r11d

  #MEMBARRIER

  movl  8(%rdi), %ebx

  #MEMBARRIER

  movl  8(%rdi), %ebp

  #MEMBARRIER

  addl  %eax, %ecx <-- we do add-chaining here.

  addl  %edx, %ecx

  addl  %r8d, %ecx

  addl  %r9d, %ecx

  addl  %r10d, %ecx

  addl  %r11d, %ecx

  addl  %ebx, %ecx

  addl  %ebp, %ecx

  incl  %esi

We do add chaining instead of something like this:

  movl  %ecx, %eax

  movl  8(%rdi), %r8d

  #MEMBARRIER

  movl  8(%rdi), %r10d

  #MEMBARRIER

  movl  8(%rdi), %r9d

  #MEMBARRIER

  movl  8(%rdi), %edx

  #MEMBARRIER

  movl  8(%rdi), %r11d

  #MEMBARRIER

  movl  8(%rdi), %ebx

  #MEMBARRIER

  movl  8(%rdi), %ebp

  #MEMBARRIER

  movl  8(%rdi), %ecx

  #MEMBARRIER

  addl  %r8d, %r10d  <-- add-tree form instead of chaining.

  addl  %r9d, %edx

  addl  %r10d, %edx

  addl  %r11d, %ebx

  addl  %ebp, %ecx

  addl  %ebx, %ecx

  addl  %edx, %ecx

  addl  %eax, %ecx

  incl  %esi

I generated the above assembly based on add-tree IR:

define i32 @test(i8 addrspace(1)* %tmp, i32 %init) {

entry:

  br label %loop

loop:

  %sum = phi i32 [0, %entry], [%csum, %loop]

  %iv = phi i32 [%init, %entry], [ %ivnext, %loop]

  %c1 = getelementptr inbounds i8, i8 addrspace(1)* %tmp, i64 8

  %addr = bitcast i8 addrspace(1)* %c1 to i32 addrspace(1)*

  %c2 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c3 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c4 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c5 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c6 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c7 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c8 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c9 = load i32, i32 addrspace(1)* %addr, align 8

  fence acquire

  %c10 = add i32 %c2, %c3 

  %c11 = add i32 %c4, %c5 

  %c12 = add i32 %c10, %c11

  %c13 = add i32 %c6, %c7 

  %c14 = add i32 %c8, %c9 

  %c15 = add i32 %c13, %c14

  %c16 = add i32 %c12, %c15

  %csum = add i32 %sum, %c16

  %ivnext = add nsw nuw i32 %iv, 1

  %cond = icmp ult i32 %ivnext, 100 

  br i1 %cond, label %loop, label %exit

exit:

  ret i32 %sum

}</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>