[llvm] r239843 - Fix PR 23525 - Separate header mass propagation in irregular loops.

Wed Jun 17 07:11:24 PDT 2015

On Tue, Jun 16, 2015 at 5:00 PM, Duncan P. N. Exon Smith
<dexonsmith at apple.com> wrote:
>
> > On 2015-Jun-16, at 12:10, Diego Novillo <dnovillo at google.com> wrote:
> > @@ -223,6 +226,14 @@ public:
> >     BlockNode getHeader() const { return Nodes[0]; }
> >     bool isIrreducible() const { return NumHeaders > 1; }
> >
> > +    HeaderMassList::difference_type headerIndexFor(const BlockNode &B) {
>
> Should be a verb, something like `getHeaderIndex()`.

Done.

> Can you describe the new behaviour somewhere?  I think it belongs somewhere
> inside:
>
>     ///  2. Calculate mass and scale in loops (\a computeMassInLoops()).


Done.

> > Added: llvm/trunk/test/Analysis/BlockFrequencyInfo/PR23525.ll
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/BlockFrequencyInfo/PR23525.ll?rev=239843&view=auto
> > ==============================================================================
> > --- llvm/trunk/test/Analysis/BlockFrequencyInfo/PR23525.ll (added)
> > +++ llvm/trunk/test/Analysis/BlockFrequencyInfo/PR23525.ll Tue Jun 16 14:10:58 2015
>
> Please rename this testcase to something that describes what you're
> testing.  Maybe irreducible-backedge-mass.ll?
>
> The PR can go in a comment at the top of the file.

Sure, I named it based on what I've seen in other test directories. I
found a couple hundred tests using this naming structure. Is there an
established convention for this?

Actually, I don't even think we need this new test case. I had to
adjust irreducible.ll because the compiler now computes a better
answer for something irreducible.ll was explicitly stating as wrong
before.  Should I just leave the test in irreducible.ll? No point
testing the same thing twice.

> I think you should choose more descriptive label names.

This testcase was generated from a .c file. It has the labels that the
FE generates.

> > -; CHECK-NEXT: left: float = 3.0,
> > +; CHECK-NEXT: left: float = 0.14{{[0-9]*}},
>
> I think you can leave this as:
>
>     CHECK-NEXT: left: float = 0.14

Done.

> > -; CHECK-NEXT: right: float = 3.0,
> > +; CHECK-NEXT: right: float = 0.42{{[0-9]*}},
>
> same here

Done.

> > top:
> > -; CHECK-NEXT: top: float = 3.0,
> > +; CHECK-NEXT: top: float = 8.43{{[0-9]*}},
>
> same here

Done.

Patch attached.  OK for trunk?


Thanks.  Diego.
-------------- next part --------------

diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 0208477..56b8f8f 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -226,7 +226,7 @@ public:
     BlockNode getHeader() const { return Nodes[0]; }
     bool isIrreducible() const { return NumHeaders > 1; }
 
-    HeaderMassList::difference_type headerIndexFor(const BlockNode &B) {
+    HeaderMassList::difference_type getHeaderIndex(const BlockNode &B) {
       assert(isHeader(B) && "this is only valid on loop header blocks");
       if (isIrreducible())
         return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) -
@@ -716,6 +716,17 @@ void IrreducibleGraph::addEdges(const BlockNode &Node,
 ///         - Distribute the mass accordingly, dithering to minimize mass loss,
 ///           as described in \a distributeMass().
 ///
+///     In the case of irreducible loops, instead of a single loop header,
+///     there will be several. The computation of backedge masses is similar
+///     but instead of having a single backedge mass, there will be one
+///     backedge per loop header. In these cases, each backedge will carry
+///     a mass proportional to the edge weights along the corresponding
+///     path.
+///
+///     At the end of propagation, the full mass assigned to the loop will be
+///     distributed among the loop headers proportionally according to the
+///     mass flowing through their backedges.
+///
 ///     Finally, calculate the loop scale from the accumulated backedge mass.
 ///
 ///  3. Distribute mass in the function (\a computeMassInFunction()).
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 88fbf7c..c4a7552 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -414,7 +414,7 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
 
     // Check for a backedge.
     if (W.Type == Weight::Backedge) {
-      auto ix = OuterLoop->headerIndexFor(W.TargetNode);
+      auto ix = OuterLoop->getHeaderIndex(W.TargetNode);
       OuterLoop->BackedgeMass[ix] += Taken;
       DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
       continue;
@@ -741,7 +741,7 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
   DEBUG(dbgs() << "adjust-loop-header-mass:\n");
   for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
     auto &HeaderNode = Loop.Nodes[H];
-    auto &BackedgeMass = Loop.BackedgeMass[Loop.headerIndexFor(HeaderNode)];
+    auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
     DEBUG(dbgs() << " - Add back edge mass for node "
                  << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
     Dist.addLocal(HeaderNode, BackedgeMass.getMass());
diff --git a/test/Analysis/BlockFrequencyInfo/PR23525.ll b/test/Analysis/BlockFrequencyInfo/PR23525.ll
deleted file mode 100644
index 43bcd57..0000000
--- a/test/Analysis/BlockFrequencyInfo/PR23525.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; RUN: opt < %s -analyze -block-freq | FileCheck %s
-
- at g = global i32 0, align 4
-
-; Function Attrs: inlinehint noinline nounwind uwtable
-define i32 @_Z8hot_loopi(i32 %n) !prof !1 {
-entry:
-  %div = sdiv i32 %n, 2
-  %rem12 = and i32 %n, 1
-  %cmp = icmp eq i32 %rem12, 0
-  br i1 %cmp, label %Next, label %for.cond, !prof !2
-
-; CHECK: - for.cond: float = 25.85{{[0-9]*}}, int = 206
-for.cond:                                         ; preds = %entry, %for.inc
-  %i.0 = phi i32 [ %inc, %for.inc ], [ %div, %entry ]
-  %cmp1 = icmp slt i32 %i.0, %n
-  br i1 %cmp1, label %for.body, label %for.end, !prof !3, !llvm.loop !4
-
-; CHECK: - for.body: float = 24.52, int = 196
-for.body:                                         ; preds = %for.cond
-  %rem213 = and i32 %i.0, 1
-  %cmp3 = icmp eq i32 %rem213, 0
-  br i1 %cmp3, label %if.then.4, label %Next, !prof !6
-
-; CHECK: - if.then.4: float = 12.26{{[0-9]*}}, int = 98
-if.then.4:                                        ; preds = %for.body
-  %0 = load i32, i32* @g, align 4, !tbaa !7
-  %mul = shl nsw i32 %0, 1
-  br label %for.inc
-
-; CHECK: - Next: float = 12.41{{[0-9]*}}, int = 99
-Next:                                             ; preds = %for.body, %entry
-  %i.1 = phi i32 [ %div, %entry ], [ %i.0, %for.body ]
-  %1 = load i32, i32* @g, align 4, !tbaa !7
-  %add = add nsw i32 %1, %n
-  br label %for.inc
-
-; CHECK: - for.inc: float = 38.28{{[0-9]*}}, int = 306
-for.inc:                                          ; preds = %if.then.4, %Next
-  %storemerge = phi i32 [ %add, %Next ], [ %mul, %if.then.4 ]
-  %i.2 = phi i32 [ %i.1, %Next ], [ %i.0, %if.then.4 ]
-  store i32 %storemerge, i32* @g, align 4, !tbaa !7
-  %inc = add nsw i32 %i.2, 1
-  br label %for.cond
-
-; CHECK: - for.end: float = 1.0, int = 8
-for.end:                                          ; preds = %for.cond
-  %2 = load i32, i32* @g, align 4, !tbaa !7
-  ret i32 %2
-}
-
-; Function Attrs: nounwind uwtable
-define i32 @main() !prof !11 {
-entry:
-  br label %for.body
-
-for.cond.cleanup:                                 ; preds = %for.body
-  ret i32 0
-
-for.body:                                         ; preds = %for.body, %entry
-  %i.04 = phi i32 [ 1, %entry ], [ %inc, %for.body ]
-  %call = tail call i32 @_Z8hot_loopi(i32 %i.04)
-  %inc = add nuw nsw i32 %i.04, 1
-  %exitcond = icmp eq i32 %inc, 100
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !12
-}
-
-
-!1 = !{!"function_entry_count", i64 99}
-!2 = !{!"branch_weights", i32 50, i32 51}
-!3 = !{!"branch_weights", i32 2452, i32 100}
-!4 = distinct !{!4, !5}
-!5 = !{!"llvm.loop.unroll.disable"}
-!6 = !{!"branch_weights", i32 1227, i32 1226}
-!7 = !{!8, !8, i64 0}
-!8 = !{!"int", !9, i64 0}
-!9 = !{!"omnipotent char", !10, i64 0}
-!10 = !{!"Simple C/C++ TBAA"}
-!11 = !{!"function_entry_count", i64 1}
-!12 = !{!"branch_weights", i32 2, i32 100}
diff --git a/test/Analysis/BlockFrequencyInfo/irreducible.ll b/test/Analysis/BlockFrequencyInfo/irreducible.ll
index e58b5eb..c1b1c2a 100644
--- a/test/Analysis/BlockFrequencyInfo/irreducible.ll
+++ b/test/Analysis/BlockFrequencyInfo/irreducible.ll
@@ -392,15 +392,15 @@ entry:
   br i1 %x, label %left, label %right, !prof !21
 
 left:
-; CHECK-NEXT: left: float = 0.14{{[0-9]*}},
+; CHECK-NEXT: left: float = 0.14
   br i1 %x, label %top, label %bottom, !prof !22
 
 right:
-; CHECK-NEXT: right: float = 0.42{{[0-9]*}},
+; CHECK-NEXT: right: float = 0.42
   br i1 %x, label %top, label %bottom, !prof !22
 
 top:
-; CHECK-NEXT: top: float = 8.43{{[0-9]*}},
+; CHECK-NEXT: top: float = 8.43
   switch i2 %y, label %exit [ i2 0, label %left
                               i2 1, label %right
                               i2 2, label %bottom ], !prof !23