[llvm-bugs] [Bug 32214] New: Non-optimal loop rotation due to BPI
via llvm-bugs
llvm-bugs at lists.llvm.org
Fri Mar 10 05:15:23 PST 2017
https://bugs.llvm.org/show_bug.cgi?id=32214
Bug ID: 32214
Summary: Non-optimal loop rotation due to BPI
Product: libraries
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: Common Code Generator Code
Assignee: unassignedbugs at nondot.org
Reporter: apilipenko at azulsystems.com
CC: llvm-bugs at lists.llvm.org
This is a bug report from Serguei Katkov (serguei.katkov at azul.com)
Block-placement pass does non-optimal loop rotation due to Branch Probability
Analysis ignores prof metadata for unreachable branches.
Let's consider the following function:
================================================
void throw_ex(int i) __attribute__((noreturn));
void progress(int);
int test(int count) {
int sum = 2 * count;
for(int i = 0; i < count; i++) {
if (i > 9000000)
throw_ex(i);
if (i % 1024 == 0)
progress(sum);
sum += i;
}
return sum;
}
================================================
If we compile it and gather the branch profile with invocation of test(8000001)
we get the following data:
i < count has probability (8000000, 1)
i > 9000000 has probability (0, 8000000)
i % 1024 == 0 has probability (1023, 1)
Let's add this information to ll file and get
================================================
; ModuleID = 'test.cpp'
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-redhat-linux-gnu"
; Function Attrs: uwtable
define i32 @_Z4testi(i32 %count) #0 {
%1 = shl nsw i32 %count, 1
%2 = icmp sgt i32 %count, 0
br i1 %2, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0, %9
%i.04 = phi i32 [ %11, %9 ], [ 0, %0 ]
%sum.03 = phi i32 [ %10, %9 ], [ %1, %0 ]
%3 = icmp sgt i32 %i.04, 9000000
br i1 %3, label %4, label %5, !prof !3
; <label>:4 ; preds = %.lr.ph
tail call void @_Z8throw_exi(i32 %i.04) #3
unreachable
; <label>:5 ; preds = %.lr.ph
%6 = and i32 %i.04, 1023
%7 = icmp eq i32 %6, 0
br i1 %7, label %8, label %9, !prof !4
; <label>:8 ; preds = %5
tail call void @_Z8progressi(i32 %sum.03)
br label %9
; <label>:9 ; preds = %8, %5
%10 = add nsw i32 %i.04, %sum.03
%11 = add nsw i32 %i.04, 1
%12 = icmp slt i32 %11, %count
br i1 %12, label %.lr.ph, label %._crit_edge, !prof !5
._crit_edge: ; preds = %9, %0
%sum.0.lcssa = phi i32 [ %1, %0 ], [ %10, %9 ]
ret i32 %sum.0.lcssa
}
; Function Attrs: noreturn
declare void @_Z8throw_exi(i32) #1
declare void @_Z8progressi(i32) #2
attributes #0 = { uwtable "less-precise-fpmad"="false"
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
"no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { noreturn "less-precise-fpmad"="false"
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
"no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false"
"no-infs-fp-math"="false" "no-nans-fp-math"="false"
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
"use-soft-float"="false" }
attributes #3 = { noreturn }
!3 = !{!"branch_weights", i32 0, i32 8000000}
!4 = !{!"branch_weights", i32 1, i32 1023}
!5 = !{!"branch_weights", i32 8000000, i32 1}
================================================
After generation of asm we'll get
================================================
…
# BB#1: # %.lr.ph.preheader
xorl %ebp, %ebp
jmp .LBB0_2
.p2align 4, 0x90
.LBB0_3: # in Loop: Header=BB0_2 Depth=1
testw $1023, %bp # imm = 0x3FF
je .LBB0_4
.LBB0_5: # in Loop: Header=BB0_2 Depth=1
addl %ebp, %ebx
incl %ebp
cmpl %r14d, %ebp
jl .LBB0_2
jmp .LBB0_6
.LBB0_4: # in Loop: Header=BB0_2 Depth=1
movl %ebx, %edi
callq _Z8progressi
jmp .LBB0_5
.p2align 4, 0x90
.LBB0_2: # %.lr.ph
# =>This Inner Loop Header: Depth=1
cmpl $9000001, %ebp # imm = 0x895441
jl .LBB0_3
# BB#7:
movl %ebp, %edi
callq _Z8throw_exi
.LBB0_6: # %._crit_edge
movl %ebx, %eax
popq %rbx
popq %r14
popq %rbp
retq
…
================================================
Using the pseudo-language it looks like
================================================
PRE_HEADER:
i = 0
jump HEADER
MIDDLE_BLOCK:
i%1024 == 0 then jump COLD_BLOCK
BACK_BRANCH:
sum += i
i++
i < count then jump HEADER
jump EXIT
COLD_BLOCK:
call progress(i)
jump BACK_BRANCH
HEADER:
i < 9000001 then jump MIDDLE_BLOCK
UNREACHABLE:
call throw_ex(i)
EXIT:
return sum
================================================
It happened due to block-placement pass did loop rotation thinking that the
hottest exit from the loop comes from HEADER (!!!). It is because the branch
probability
of HEADER provided by metadata
!3 = !{!"branch_weights", i32 0, i32 8000000}
has been ignored and instead the hardcoded probability (1, 1048576) has been
used.
As a result this branch appeared be hotter then Exit from the BACK_BRANCH and
it was placed
at the end of loop.
In reality the right order of basic blocks should be
PRE_HEADER
COLD_BLOCK
HEADER
MIDDLE_BLOCK
BACK_BRANCH
EXIT
UNREACHABLE
The patch https://reviews.llvm.org/D30631 fixes this.
Please note that by some reason the metadata !3 is absent but !4 and !5 are
present
we need also https://reviews.llvm.org/D30633 to produce the correct code.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170310/632ce4d4/attachment.html>
More information about the llvm-bugs
mailing list