[LLVMbugs] [Bug 23163] New: gep(gep...) merging in instcombine optimization hurts performance sometimes
bugzilla-daemon at llvm.org
bugzilla-daemon at llvm.org
Wed Apr 8 13:40:16 PDT 2015
https://llvm.org/bugs/show_bug.cgi?id=23163
Bug ID: 23163
Summary: gep(gep...) merging in instcombine optimization hurts
performance sometimes
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: normal
Priority: P
Component: Scalar Optimizations
Assignee: unassignedbugs at nondot.org
Reporter: wmi at google.com
CC: llvmbugs at cs.uiuc.edu
Classification: Unclassified
Created attachment 14170
--> https://llvm.org/bugs/attachment.cgi?id=14170&action=edit
1.cc
For the testcase 1.cc attached,
~/workarea/llvm-r234388/build/bin/clang -O2 -fno-omit-frame-pointer -std=c++11
1.cc -S -o 1.s
In 1.s, the kernel loop contains 29 insns:
.LBB1_6: # %for.body7
# Parent Loop BB1_3 Depth=1
# => This Inner Loop Header: Depth=2
leaq (%r12,%rdi), %rsi
movslq %esi, %rbx
leaq (%rbx,%r10), %rsi
movzbl (%r9,%rsi), %esi
leal (%rsi,%rsi,2), %esi
leal (%r13,%rdi), %r8d
movslq %r8d, %rcx
addq %r10, %rcx
movzbl (%r9,%rcx), %ecx
leal 1(%rbx), %eax
cltq
addq %r10, %rax
movzbl (%r9,%rax), %eax
addl %ecx, %eax
leal (%r14,%rdi), %ecx
movslq %ecx, %rcx
addq %r10, %rcx
movzbl (%r9,%rcx), %ecx
addl $2, %ebx
movslq %ebx, %rbx
addq %r10, %rbx
movzbl (%r9,%rbx), %ebx
leal (%rcx,%rsi,2), %ecx
leal (%rcx,%rax,4), %eax
addl %ebx, %eax
movl %eax, (%r11,%rdi,2)
addq $2, %rdi
decl %edx
jne .LBB1_6
If we disable gep merge in InstCombine pass, the kernel loop contains less
insns (25 insns):
.LBB1_6: # %for.body7
# Parent Loop BB1_3 Depth=1
# => This Inner Loop Header: Depth=2
leaq (%r12,%rcx), %rsi
movslq %esi, %rsi
movzbl (%r9,%rsi), %edx
leal (%rdx,%rdx,2), %edx
leal (%r13,%rcx), %ebx
movslq %ebx, %rbx
movzbl (%r9,%rbx), %ebx
leal 1(%rsi), %r8d
movslq %r8d, %rax
movzbl (%r9,%rax), %eax
addl %ebx, %eax
leal (%r14,%rcx), %ebx
movslq %ebx, %rbx
movzbl (%r9,%rbx), %ebx
addl $2, %esi
movslq %esi, %rsi
movzbl (%r9,%rsi), %esi
leal (%rbx,%rdx,2), %edx
leal (%rdx,%rax,4), %eax
addl %esi, %eax
movl %eax, (%r11,%rcx,2)
addq $2, %rcx
incl %edi
cmpl %edi, %r15d
jne .LBB1_6
gep(gep ...) merging optimization is similar with forward propagation
optimization. We need to be careful especially when the source gep has more
than one use. Usually we do the optimization only when gep merging will not
increase the cost of the destination. We can see why gep merging is bad here
from the IR of 1.cc below:
* The IR before InstCombine:
...
%2 = load i8*, i8** %data, align 8, !tbaa !7
%idx.ext2 = sext i32 %call1 to i64
%add.ptr3 = getelementptr inbounds i8, i8* %2, i64 %idx.ext2
...
for.body7:
...
%arrayidx = getelementptr inbounds i8, i8* %add.ptr3, i64 %idxprom
%3 = load i8, i8* %arrayidx, align 1, !tbaa !9
...
%arrayidx11 = getelementptr inbounds i8, i8* %add.ptr3, i64 %idxprom10
%4 = load i8, i8* %arrayidx11, align 1, !tbaa !9
...
%arrayidx15 = getelementptr inbounds i8, i8* %add.ptr3, i64 %idxprom14
%5 = load i8, i8* %arrayidx15, align 1, !tbaa !9
...
%arrayidx23 = getelementptr inbounds i8, i8* %add.ptr3, i64 %idxprom22
%6 = load i8, i8* %arrayidx23, align 1, !tbaa !9
...
br label %for.cond5
* The IR after InstCombine with gep merge:
...
%2 = load i8*, i8** %data, align 8, !tbaa !7
%idx.ext2 = sext i32 %call1 to i64
...
for.body7:
...
%add.ptr3.sum = add nsw i64 %idx.ext2, %idxprom
%arrayidx = getelementptr inbounds i8, i8* %2, i64 %add.ptr3.sum
%3 = load i8, i8* %arrayidx, align 1, !tbaa !9
...
%add.ptr3.sum61 = add nsw i64 %idx.ext2, %idxprom10
%arrayidx11 = getelementptr inbounds i8, i8* %2, i64 %add.ptr3.sum61
%4 = load i8, i8* %arrayidx11, align 1, !tbaa !9
...
%add.ptr3.sum63 = add nsw i64 %idx.ext2, %idxprom14
%arrayidx15 = getelementptr inbounds i8, i8* %2, i64 %add.ptr3.sum63
%5 = load i8, i8* %arrayidx15, align 1, !tbaa !9
...
%add.ptr3.sum64 = add nsw i64 %idx.ext2, %idxprom22
%arrayidx23 = getelementptr inbounds i8, i8* %2, i64 %add.ptr3.sum64
%6 = load i8, i8* %arrayidx23, align 1, !tbaa !9
...
br label %for.cond5
We can see that after gep merging, there is one less gep outside the loop, but
there are four more add insns inside the loop.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20150408/c5fdc63f/attachment.html>
More information about the llvm-bugs
mailing list