[llvm-bugs] [Bug 32214] New: Non-optimal loop rotation due to BPI

via llvm-bugs llvm-bugs at lists.llvm.org
Fri Mar 10 05:15:23 PST 2017


https://bugs.llvm.org/show_bug.cgi?id=32214

            Bug ID: 32214
           Summary: Non-optimal loop rotation due to BPI
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Common Code Generator Code
          Assignee: unassignedbugs at nondot.org
          Reporter: apilipenko at azulsystems.com
                CC: llvm-bugs at lists.llvm.org

This is a bug report from Serguei Katkov (serguei.katkov at azul.com)

Block-placement pass does non-optimal loop rotation due to Branch Probability
Analysis ignores prof metadata for unreachable branches.

Let's consider the following function:
================================================
void throw_ex(int i) __attribute__((noreturn));
void progress(int);

int test(int count) {
  int sum = 2 * count;
  for(int i = 0; i < count; i++) {
    if (i > 9000000)
      throw_ex(i);
    if (i % 1024 == 0)
      progress(sum);
    sum += i;
  }
  return sum;
}
================================================
If we compile it and gather the branch profile with invocation of test(8000001)
we get the following data:
i < count has probability (8000000, 1)
i > 9000000 has probability (0, 8000000)
i % 1024 == 0 has probability (1023, 1)

Let's add this information to ll file and get
================================================
; ModuleID = 'test.cpp'
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-redhat-linux-gnu"

; Function Attrs: uwtable
define i32 @_Z4testi(i32 %count) #0 {
  %1 = shl nsw i32 %count, 1
  %2 = icmp sgt i32 %count, 0
  br i1 %2, label %.lr.ph, label %._crit_edge

.lr.ph:                                           ; preds = %0, %9
  %i.04 = phi i32 [ %11, %9 ], [ 0, %0 ]
  %sum.03 = phi i32 [ %10, %9 ], [ %1, %0 ]
  %3 = icmp sgt i32 %i.04, 9000000
  br i1 %3, label %4, label %5, !prof !3

; <label>:4                                       ; preds = %.lr.ph
  tail call void @_Z8throw_exi(i32 %i.04) #3
  unreachable

; <label>:5                                       ; preds = %.lr.ph
  %6 = and i32 %i.04, 1023
  %7 = icmp eq i32 %6, 0
  br i1 %7, label %8, label %9, !prof !4

; <label>:8                                       ; preds = %5
  tail call void @_Z8progressi(i32 %sum.03)
  br label %9

; <label>:9                                       ; preds = %8, %5
  %10 = add nsw i32 %i.04, %sum.03
  %11 = add nsw i32 %i.04, 1
  %12 = icmp slt i32 %11, %count
  br i1 %12, label %.lr.ph, label %._crit_edge, !prof !5

._crit_edge:                                      ; preds = %9, %0
  %sum.0.lcssa = phi i32 [ %1, %0 ], [ %10, %9 ]
  ret i32 %sum.0.lcssa
}

; Function Attrs: noreturn
declare void @_Z8throw_exi(i32) #1

declare void @_Z8progressi(i32) #2

attributes #0 = { uwtable "less-precise-fpmad"="false"
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
"no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { noreturn "less-precise-fpmad"="false"
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
"no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false"
"no-infs-fp-math"="false" "no-nans-fp-math"="false"
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
"use-soft-float"="false" }
attributes #3 = { noreturn }

!3 = !{!"branch_weights", i32 0, i32 8000000}
!4 = !{!"branch_weights", i32 1, i32 1023}
!5 = !{!"branch_weights", i32 8000000, i32 1}
================================================
After generation of asm we'll get
================================================
…
# BB#1:                                 # %.lr.ph.preheader
     xorl %ebp, %ebp
     jmp  .LBB0_2
     .p2align   4, 0x90
.LBB0_3:                                #   in Loop: Header=BB0_2 Depth=1
     testw $1023, %bp              # imm = 0x3FF
     je   .LBB0_4
.LBB0_5:                                #   in Loop: Header=BB0_2 Depth=1
     addl %ebp, %ebx
     incl %ebp
     cmpl %r14d, %ebp
     jl   .LBB0_2
     jmp  .LBB0_6
.LBB0_4:                                #   in Loop: Header=BB0_2 Depth=1
     movl %ebx, %edi
     callq _Z8progressi
     jmp  .LBB0_5
     .p2align   4, 0x90
.LBB0_2:                                # %.lr.ph
                                        # =>This Inner Loop Header: Depth=1
     cmpl $9000001, %ebp          # imm = 0x895441
     jl   .LBB0_3
# BB#7:
     movl %ebp, %edi
     callq _Z8throw_exi
.LBB0_6:                                # %._crit_edge
     movl %ebx, %eax
     popq %rbx
     popq %r14
     popq %rbp
     retq
…
================================================
Using the pseudo-language it looks like
================================================
PRE_HEADER:
i = 0
jump HEADER

MIDDLE_BLOCK:
i%1024 == 0 then jump COLD_BLOCK

BACK_BRANCH:
sum += i
i++
i < count then jump HEADER
jump EXIT

COLD_BLOCK:
call progress(i)
jump BACK_BRANCH

HEADER:
i < 9000001 then jump MIDDLE_BLOCK

UNREACHABLE:
call throw_ex(i)

EXIT:
return sum
================================================
It happened due to block-placement pass did loop rotation thinking that the
hottest exit from the loop comes from HEADER (!!!). It is because the branch
probability
of HEADER provided by metadata
!3 = !{!"branch_weights", i32 0, i32 8000000}
has been ignored and instead the hardcoded probability (1, 1048576) has been
used.
As a result this branch appeared be hotter then Exit from the BACK_BRANCH and
it was placed
at the end of loop.

In reality the right order of basic blocks should be
PRE_HEADER
COLD_BLOCK
HEADER
MIDDLE_BLOCK
BACK_BRANCH
EXIT
UNREACHABLE

The patch https://reviews.llvm.org/D30631 fixes this.
Please note that by some reason the metadata !3 is absent but !4 and !5 are
present
we need also https://reviews.llvm.org/D30633 to produce the correct code.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170310/632ce4d4/attachment.html>


More information about the llvm-bugs mailing list