[llvm-bugs] [Bug 36696] New: X86 reassociate generates add chain instead of tree for ILP
via llvm-bugs
llvm-bugs at lists.llvm.org
Mon Mar 12 13:13:32 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=36696
Bug ID: 36696
Summary: X86 reassociate generates add chain instead of tree
for ILP
Product: tools
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: llc
Assignee: unassignedbugs at nondot.org
Reporter: anna at azul.com
CC: llvm-bugs at lists.llvm.org
Given an IR like the one below:
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
target triple = "x86_64-unknown-linux-gnu"
define i32 @test(i8 addrspace(1)* %tmp, i32 %init) {
entry:
br label %loop
loop:
%sum = phi i32 [0, %entry], [%csum, %loop]
%iv = phi i32 [%init, %entry], [ %ivnext, %loop]
%c1 = getelementptr inbounds i8, i8 addrspace(1)* %tmp, i64 8
%addr = bitcast i8 addrspace(1)* %c1 to i32 addrspace(1)*
%c2 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c3 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c4 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c5 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c6 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c7 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c8 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c9 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c14 = add i32 %c3, %c2
%c13 = add i32 %c14, %c4
%c15 = add i32 %c13, %c5
%c11 = add i32 %c15, %c6
%c10 = add i32 %c11, %c7
%c12 = add i32 %c10, %c8
%c16 = add i32 %c12, %c9
%csum = add i32 %sum, %c16
%ivnext = add nsw nuw i32 %iv, 1
%cond = icmp ult i32 %ivnext, 100
br i1 %cond, label %loop, label %exit
exit:
ret i32 %sum
}
!0 = !{!"-mcpu=haswell"}
!1 = !{!"-mattr=+sse2,+cx16,+sahf,+avx"}
The LLC machine combiner which is based on registers and critical path is not
able to convert this into add-tree form (instead of the add-chain form).
@Sanjay, could we extend https://reviews.llvm.org/D10321 to handle this case? I
tried reducing the adds to just 3 and see if it's some sort of register
pressure that's preventing ILP identification. Also, tried the same IR in
straight line code, but no luck. There's AVX support and this IR is run on
haswell machine.
The assembly generated with LLC -O3 is:
movl %ecx, %eax
movl 8(%rdi), %ecx
#MEMBARRIER
movl 8(%rdi), %edx
#MEMBARRIER
movl 8(%rdi), %r8d
#MEMBARRIER
movl 8(%rdi), %r9d
#MEMBARRIER
movl 8(%rdi), %r10d
#MEMBARRIER
movl 8(%rdi), %r11d
#MEMBARRIER
movl 8(%rdi), %ebx
#MEMBARRIER
movl 8(%rdi), %ebp
#MEMBARRIER
addl %eax, %ecx <-- we do add-chaining here.
addl %edx, %ecx
addl %r8d, %ecx
addl %r9d, %ecx
addl %r10d, %ecx
addl %r11d, %ecx
addl %ebx, %ecx
addl %ebp, %ecx
incl %esi
We do add chaining instead of something like this:
movl %ecx, %eax
movl 8(%rdi), %r8d
#MEMBARRIER
movl 8(%rdi), %r10d
#MEMBARRIER
movl 8(%rdi), %r9d
#MEMBARRIER
movl 8(%rdi), %edx
#MEMBARRIER
movl 8(%rdi), %r11d
#MEMBARRIER
movl 8(%rdi), %ebx
#MEMBARRIER
movl 8(%rdi), %ebp
#MEMBARRIER
movl 8(%rdi), %ecx
#MEMBARRIER
addl %r8d, %r10d <-- add-tree form instead of chaining.
addl %r9d, %edx
addl %r10d, %edx
addl %r11d, %ebx
addl %ebp, %ecx
addl %ebx, %ecx
addl %edx, %ecx
addl %eax, %ecx
incl %esi
I generated the above assembly based on add-tree IR:
define i32 @test(i8 addrspace(1)* %tmp, i32 %init) {
entry:
br label %loop
loop:
%sum = phi i32 [0, %entry], [%csum, %loop]
%iv = phi i32 [%init, %entry], [ %ivnext, %loop]
%c1 = getelementptr inbounds i8, i8 addrspace(1)* %tmp, i64 8
%addr = bitcast i8 addrspace(1)* %c1 to i32 addrspace(1)*
%c2 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c3 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c4 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c5 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c6 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c7 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c8 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c9 = load i32, i32 addrspace(1)* %addr, align 8
fence acquire
%c10 = add i32 %c2, %c3
%c11 = add i32 %c4, %c5
%c12 = add i32 %c10, %c11
%c13 = add i32 %c6, %c7
%c14 = add i32 %c8, %c9
%c15 = add i32 %c13, %c14
%c16 = add i32 %c12, %c15
%csum = add i32 %sum, %c16
%ivnext = add nsw nuw i32 %iv, 1
%cond = icmp ult i32 %ivnext, 100
br i1 %cond, label %loop, label %exit
exit:
ret i32 %sum
}
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180312/b4cdb96a/attachment.html>
More information about the llvm-bugs
mailing list