[llvm] r318693 - Add heuristics for irreducible loop metadata under PGO

Hiroshi Yamauchi via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 20 13:03:38 PST 2017


Author: yamauchi
Date: Mon Nov 20 13:03:38 2017
New Revision: 318693

URL: http://llvm.org/viewvc/llvm-project?rev=318693&view=rev
Log:
Add heuristics for irreducible loop metadata under PGO

Summary:
Add the following heuristics for irreducible loop metadata:

- When an irreducible loop header is missing the loop header weight metadata,
  give it the minimum weight seen among other headers.
- Annotate indirectbr targets with the loop header weight metadata (as they are
  likely to become irreducible loop headers after indirectbr tail duplication.)

These greatly improve the accuracy of the block frequency info of the Python
interpreter loop (eg. from ~3-16x off down to ~40-55% off) and the Python
performance (eg. unpack_sequence from ~50% slower to ~8% faster than GCC) due to
better register allocation under PGO.

Reviewers: davidxl

Reviewed By: davidxl

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D39980

Modified:
    llvm/trunk/include/llvm/Analysis/BlockFrequencyInfoImpl.h
    llvm/trunk/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
    llvm/trunk/test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll
    llvm/trunk/test/Transforms/PGOProfile/irreducible.ll

Modified: llvm/trunk/include/llvm/Analysis/BlockFrequencyInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/BlockFrequencyInfoImpl.h?rev=318693&r1=318692&r2=318693&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/BlockFrequencyInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/BlockFrequencyInfoImpl.h Mon Nov 20 13:03:38 2017
@@ -16,6 +16,7 @@
 #define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PostOrderIterator.h"
@@ -1155,35 +1156,56 @@ bool BlockFrequencyInfoImpl<BT>::compute
     DEBUG(dbgs() << "isIrreducible = true\n");
     Distribution Dist;
     unsigned NumHeadersWithWeight = 0;
+    Optional<uint64_t> MinHeaderWeight;
+    DenseSet<uint32_t> HeadersWithoutWeight;
+    HeadersWithoutWeight.reserve(Loop.NumHeaders);
     for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
       auto &HeaderNode = Loop.Nodes[H];
       const BlockT *Block = getBlock(HeaderNode);
       IsIrrLoopHeader.set(Loop.Nodes[H].Index);
       Optional<uint64_t> HeaderWeight = Block->getIrrLoopHeaderWeight();
-      if (!HeaderWeight)
+      if (!HeaderWeight) {
+        DEBUG(dbgs() << "Missing irr loop header metadata on "
+              << getBlockName(HeaderNode) << "\n");
+        HeadersWithoutWeight.insert(H);
         continue;
+      }
       DEBUG(dbgs() << getBlockName(HeaderNode)
             << " has irr loop header weight " << HeaderWeight.getValue()
             << "\n");
       NumHeadersWithWeight++;
       uint64_t HeaderWeightValue = HeaderWeight.getValue();
-      if (HeaderWeightValue)
+      if (!MinHeaderWeight || HeaderWeightValue < MinHeaderWeight)
+        MinHeaderWeight = HeaderWeightValue;
+      if (HeaderWeightValue) {
         Dist.addLocal(HeaderNode, HeaderWeightValue);
-    }
-    if (NumHeadersWithWeight != Loop.NumHeaders) {
-      // Not all headers have a weight metadata. Distribute weight evenly.
-      Dist = Distribution();
-      for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
-        auto &HeaderNode = Loop.Nodes[H];
-        Dist.addLocal(HeaderNode, 1);
       }
     }
+    // As a heuristic, if some headers don't have a weight, give them the
+    // minimium weight seen (not to disrupt the existing trends too much by
+    // using a weight that's in the general range of the other headers' weights,
+    // and the minimum seems to perform better than the average.)
+    // FIXME: better update in the passes that drop the header weight.
+    // If no headers have a weight, give them even weight (use weight 1).
+    if (!MinHeaderWeight)
+      MinHeaderWeight = 1;
+    for (uint32_t H : HeadersWithoutWeight) {
+      auto &HeaderNode = Loop.Nodes[H];
+      const BlockT *Block = getBlock(HeaderNode);
+      assert(!Block->getIrrLoopHeaderWeight() &&
+             "Shouldn't have a weight metadata");
+      uint64_t MinWeight = MinHeaderWeight.getValue();
+      DEBUG(dbgs() << "Giving weight " << MinWeight
+            << " to " << getBlockName(HeaderNode) << "\n");
+      if (MinWeight)
+        Dist.addLocal(HeaderNode, MinWeight);
+    }
     distributeIrrLoopHeaderMass(Dist);
     for (const BlockNode &M : Loop.Nodes)
       if (!propagateMassToSuccessors(&Loop, M))
         llvm_unreachable("unhandled irreducible control flow");
-    if (NumHeadersWithWeight != Loop.NumHeaders)
-      // Not all headers have a weight metadata. Adjust header mass.
+    if (NumHeadersWithWeight == 0)
+      // No headers have a metadata. Adjust header mass.
       adjustLoopHeaderMass(Loop);
   } else {
     Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();

Modified: llvm/trunk/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/PGOInstrumentation.cpp?rev=318693&r1=318692&r2=318693&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/PGOInstrumentation.cpp (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/PGOInstrumentation.cpp Mon Nov 20 13:03:38 2017
@@ -1188,11 +1188,22 @@ void PGOUseFunc::setBranchWeights() {
   }
 }
 
+static bool isIndirectBrTarget(BasicBlock *BB) {
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    if (isa<IndirectBrInst>((*PI)->getTerminator()))
+      return true;
+  }
+  return false;
+}
+
 void PGOUseFunc::annotateIrrLoopHeaderWeights() {
   DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
   // Find irr loop headers
   for (auto &BB : F) {
-    if (BFI->isIrrLoopHeader(&BB)) {
+    // As a heuristic also annotate indrectbr targets as they have a high chance
+    // to become an irreducible loop header after the indirectbr tail
+    // duplication.
+    if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
       TerminatorInst *TI = BB.getTerminator();
       const UseBBInfo &BBCountInfo = getBBInfo(&BB);
       setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);

Modified: llvm/trunk/test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll?rev=318693&r1=318692&r2=318693&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll (original)
+++ llvm/trunk/test/Analysis/BlockFrequencyInfo/irreducible_pgo.ll Mon Nov 20 13:03:38 2017
@@ -159,3 +159,68 @@ indirectgoto:
 ; CHECK-NEXT: - sw.default: {{.*}} count = 0
 ; CHECK-NEXT: - exit: {{.*}} count = 1
 ; CHECK-NEXT: - indirectgoto: {{.*}} count = 399, irr_loop_header_weight = 400
+
+; Missing some irr loop annotations.
+; Function Attrs: noinline norecurse nounwind uwtable
+define i32 @_Z11irreduciblePh2(i8* nocapture readonly %p) !prof !27 {
+entry:
+  %0 = load i32, i32* @tracing, align 4
+  %1 = trunc i32 %0 to i8
+  %tobool = icmp eq i32 %0, 0
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %sw.default, %entry
+  br label %dispatch_op
+
+dispatch_op:                                      ; preds = %sw.bb6, %for.cond1
+switch i8 %1, label %sw.default [
+    i8 0, label %sw.bb
+    i8 1, label %dispatch_op.sw.bb6_crit_edge
+    i8 2, label %sw.bb15
+  ], !prof !36
+
+dispatch_op.sw.bb6_crit_edge:                     ; preds = %dispatch_op
+  br label %sw.bb6
+
+sw.bb:                                            ; preds = %indirectgoto, %dispatch_op
+  br label %exit
+
+TARGET_1:                                         ; preds = %indirectgoto
+  br label %sw.bb6
+
+sw.bb6:                                           ; preds = %TARGET_1, %dispatch_op.sw.bb6_crit_edge
+  br i1 %tobool, label %dispatch_op, label %if.then, !prof !37  ; Missing !irr_loop !38
+
+if.then:                                          ; preds = %sw.bb6
+  br label %indirectgoto
+
+TARGET_2:                                         ; preds = %indirectgoto
+  br label %sw.bb15
+
+sw.bb15:                                          ; preds = %TARGET_2, %dispatch_op
+  br i1 %tobool, label %if.then18, label %exit, !prof !39, !irr_loop !40
+
+if.then18:                                        ; preds = %sw.bb15
+  br label %indirectgoto
+
+unknown_op:                                       ; preds = %indirectgoto
+  br label %sw.default
+
+sw.default:                                       ; preds = %unknown_op, %dispatch_op
+  br label %for.cond1
+
+exit:                                             ; preds = %sw.bb15, %sw.bb
+  ret i32 0
+
+indirectgoto:                                     ; preds = %if.then18, %if.then
+  %idxprom21 = zext i32 %0 to i64
+  %arrayidx22 = getelementptr inbounds [256 x i8*], [256 x i8*]* @targets, i64 0, i64 %idxprom21
+  %target = load i8*, i8** %arrayidx22, align 8
+  indirectbr i8* %target, [label %unknown_op, label %sw.bb, label %TARGET_1, label %TARGET_2], !prof !41, !irr_loop !42
+}
+
+; CHECK-LABEL: Printing analysis {{.*}} for function '_Z11irreduciblePh2':
+; CHECK: block-frequency-info: _Z11irreduciblePh2
+; CHECK: - sw.bb6: {{.*}} count = 100
+; CHECK: - sw.bb15: {{.*}} count = 100, irr_loop_header_weight = 100
+; CHECK: - indirectgoto: {{.*}} count = 400, irr_loop_header_weight = 400

Modified: llvm/trunk/test/Transforms/PGOProfile/irreducible.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PGOProfile/irreducible.ll?rev=318693&r1=318692&r2=318693&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/PGOProfile/irreducible.ll (original)
+++ llvm/trunk/test/Transforms/PGOProfile/irreducible.ll Mon Nov 20 13:03:38 2017
@@ -91,6 +91,7 @@ sw.bb:
 
 TARGET_1:                                         ; preds = %indirectgoto
   br label %sw.bb6
+; USE: br label %sw.bb6, !irr_loop {{.*}}
 
 sw.bb6:                                           ; preds = %TARGET_1, %dispatch_op.sw.bb6_crit_edge
   br i1 %tobool, label %dispatch_op, label %if.then
@@ -102,6 +103,7 @@ if.then:
 
 TARGET_2:                                         ; preds = %indirectgoto
   br label %sw.bb15
+; USE: br label %sw.bb15, !irr_loop {{.*}}
 
 sw.bb15:                                          ; preds = %TARGET_2, %dispatch_op
   br i1 %tobool, label %if.then18, label %exit




More information about the llvm-commits mailing list