[llvm] b30c9c9 - LoopUnrollRuntime: Add weights to all branches

Matthias Braun via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 11 14:25:54 PDT 2023


Author: Matthias Braun
Date: 2023-09-11T14:23:29-07:00
New Revision: b30c9c937802a78ef986cb4219eba51148f76e6c

URL: https://github.com/llvm/llvm-project/commit/b30c9c937802a78ef986cb4219eba51148f76e6c
DIFF: https://github.com/llvm/llvm-project/commit/b30c9c937802a78ef986cb4219eba51148f76e6c.diff

LOG: LoopUnrollRuntime: Add weights to all branches

Make sure every conditional branch constructed by `LoopUnrollRuntime`
code sets branch weights.

- Add new 1:127 weights for the conditional jumps checking whether the
  whole (unrolled) loop should be skipped in the generated prolog or
  epilog code.
- Remove `updateLatchBranchWeightsForRemainderLoop` function and just
  add weights immediately when constructing the relevant branches. This
  leads to simpler code and makes the code more obvious as every call
  to `CreateCondBr` now has a `BranchWeights` parameter.
- Rework formula for epilogue latch weights, to assume equal
  distribution of remainders and remove `assert` (as I was able to
  reach this code when forcing small unroll factors on the commandline).

Differential Revision: https://reviews.llvm.org/D158642

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
    llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop.ll
    llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 831b4876aed6c1e..1c8850048f6ab19 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -56,6 +56,17 @@ static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
     "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
     cl::desc("Assume the non latch exit block to be predictable"));
 
+// Probability that the loop trip count is so small that after the prolog
+// we do not enter the unrolled loop at all.
+// It is unlikely that the loop trip count is smaller than the unroll factor;
+// other than that, the choice of constant is not tuned yet.
+static const uint32_t UnrolledLoopHeaderWeights[] = {1, 127};
+// Probability that the loop trip count is so small that we skip the unrolled
+// loop completely and immediately enter the epilogue loop.
+// It is unlikely that the loop trip count is smaller than the unroll factor;
+// other than that, the choice of constant is not tuned yet.
+static const uint32_t EpilogHeaderWeights[] = {1, 127};
+
 /// Connect the unrolling prolog code to the original loop.
 /// The unrolling prolog code contains code to execute the
 /// 'extra' iterations if the run-time trip count modulo the
@@ -169,7 +180,14 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
   SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
                          nullptr, PreserveLCSSA);
   // Add the branch to the exit block (around the unrolled loop)
-  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
+  MDNode *BranchWeights = nullptr;
+  if (hasBranchWeightMD(*Latch->getTerminator())) {
+    // Assume loop is nearly always entered.
+    MDBuilder MDB(B.getContext());
+    BranchWeights = MDB.createBranchWeights(UnrolledLoopHeaderWeights);
+  }
+  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader,
+                 BranchWeights);
   InsertPt->eraseFromParent();
   if (DT) {
     auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
@@ -194,8 +212,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
                           BasicBlock *Exit, BasicBlock *PreHeader,
                           BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
                           ValueToValueMapTy &VMap, DominatorTree *DT,
-                          LoopInfo *LI, bool PreserveLCSSA,
-                          ScalarEvolution &SE) {
+                          LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
+                          unsigned Count) {
   BasicBlock *Latch = L->getLoopLatch();
   assert(Latch && "Loop must have a latch");
   BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
@@ -292,7 +310,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
   SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
                          PreserveLCSSA);
   // Add the branch to the exit block (around the unrolling loop)
-  B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
+  MDNode *BranchWeights = nullptr;
+  if (hasBranchWeightMD(*Latch->getTerminator())) {
+    // Assume equal distribution in interval [0, Count).
+    MDBuilder MDB(B.getContext());
+    BranchWeights = MDB.createBranchWeights(1, Count - 1);
+  }
+  B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
   InsertPt->eraseFromParent();
   if (DT) {
     auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
@@ -316,8 +340,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
                 const bool UnrollRemainder,
                 BasicBlock *InsertTop,
                 BasicBlock *InsertBot, BasicBlock *Preheader,
-                std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
-                ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
+                             std::vector<BasicBlock *> &NewBlocks,
+                             LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
+                             DominatorTree *DT, LoopInfo *LI, unsigned Count) {
   StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
   BasicBlock *Header = L->getHeader();
   BasicBlock *Latch = L->getLoopLatch();
@@ -371,7 +396,26 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
       Value *IdxNext =
           Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
       Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
-      Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
+      MDNode *BranchWeights = nullptr;
+      if (hasBranchWeightMD(*LatchBR)) {
+        uint32_t ExitWeight;
+        uint32_t BackEdgeWeight;
+        if (Count >= 3) {
+          // Note: We do not enter this loop for zero-remainders. The check
+          // is at the end of the loop. We assume equal distribution between
+          // possible remainders in [1, Count).
+          ExitWeight = 1;
+          BackEdgeWeight = (Count - 2) / 2;
+        } else {
+          // Unnecessary backedge, should never be taken. The conditional
+          // jump should be optimized away later.
+          ExitWeight = 1;
+          BackEdgeWeight = 0;
+        }
+        MDBuilder MDB(Builder.getContext());
+        BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+      }
+      Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
       NewIdx->addIncoming(Zero, InsertTop);
       NewIdx->addIncoming(IdxNext, NewBB);
       LatchBR->eraseFromParent();
@@ -465,32 +509,6 @@ static bool canProfitablyUnrollMultiExitLoop(
   // know of kinds of multiexit loops that would benefit from unrolling.
 }
 
-// Assign the maximum possible trip count as the back edge weight for the
-// remainder loop if the original loop comes with a branch weight.
-static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
-                                                     Loop *RemainderLoop,
-                                                     uint64_t UnrollFactor) {
-  uint64_t TrueWeight, FalseWeight;
-  BranchInst *LatchBR =
-      cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
-  if (!extractBranchWeights(*LatchBR, TrueWeight, FalseWeight))
-    return;
-  uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
-                            ? FalseWeight
-                            : TrueWeight;
-  assert(UnrollFactor > 1);
-  uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
-  BasicBlock *Header = RemainderLoop->getHeader();
-  BasicBlock *Latch = RemainderLoop->getLoopLatch();
-  auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
-  unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
-  MDBuilder MDB(RemainderLatchBR->getContext());
-  MDNode *WeightNode =
-    HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
-                : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
-  RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
-}
-
 /// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
 /// accounting for the possibility of unsigned overflow in the 2s complement
 /// domain. Preconditions:
@@ -776,7 +794,13 @@ bool llvm::UnrollRuntimeLoopRemainder(
   BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
   BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
   // Branch to either remainder (extra iterations) loop or unrolling loop.
-  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
+  MDNode *BranchWeights = nullptr;
+  if (hasBranchWeightMD(*Latch->getTerminator())) {
+    // Assume loop is nearly always entered.
+    MDBuilder MDB(B.getContext());
+    BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights);
+  }
+  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
   PreHeaderBR->eraseFromParent();
   if (DT) {
     if (UseEpilogRemainder)
@@ -805,12 +829,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
   BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
   Loop *remainderLoop = CloneLoopBlocks(
       L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
-      NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
-
-  // Assign the maximum possible trip count as the back edge weight for the
-  // remainder loop if the original loop comes with a branch weight.
-  if (remainderLoop && !UnrollRemainder)
-    updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
+      NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count);
 
   // Insert the cloned blocks into the function.
   F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
@@ -904,7 +923,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
     // Connect the epilog code to the original loop and update the
     // PHI functions.
     ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
-                  NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
+                  NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count);
 
     // Update counter in loop for unrolling.
     // Use an incrementing IV.  Pre-incr/post-incr is backedge/trip count.

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
index 82cd5f0d10aec1f..cf875ccdc147ab9 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
@@ -165,7 +165,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP2]], 7
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[INNER_1_HEADER_PROL_PREHEADER:%.*]], label [[INNER_1_HEADER_PROL_LOOPEXIT:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[INNER_1_HEADER_PROL_PREHEADER:%.*]], label [[INNER_1_HEADER_PROL_LOOPEXIT:%.*]], !prof [[PROF3:![0-9]+]]
 ; CHECK:       inner.1.header.prol.preheader:
 ; CHECK-NEXT:    br label [[INNER_1_HEADER_PROL:%.*]]
 ; CHECK:       inner.1.header.prol:
@@ -180,7 +180,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
 ; CHECK-NEXT:    [[CMP_2_PROL:%.*]] = icmp sgt i64 [[INNER_1_IV_PROL]], 0
 ; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
 ; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[INNER_1_HEADER_PROL]], label [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[INNER_1_HEADER_PROL]], label [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !prof [[PROF4:![0-9]+]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       inner.1.header.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[L_1_LCSSA_UNR_PH:%.*]] = phi i32 [ [[L_1_PROL]], [[INNER_1_LATCH_PROL]] ]
 ; CHECK-NEXT:    [[INNER_1_IV_UNR_PH:%.*]] = phi i64 [ [[INNER_1_IV_NEXT_PROL]], [[INNER_1_LATCH_PROL]] ]
@@ -189,7 +189,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
 ; CHECK-NEXT:    [[L_1_LCSSA_UNR:%.*]] = phi i32 [ undef, [[OUTER_HEADER]] ], [ [[L_1_LCSSA_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
 ; CHECK-NEXT:    [[INNER_1_IV_UNR:%.*]] = phi i64 [ [[X]], [[OUTER_HEADER]] ], [ [[INNER_1_IV_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 7
-; CHECK-NEXT:    br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF3]]
 ; CHECK:       outer.header.new:
 ; CHECK-NEXT:    br label [[INNER_1_HEADER:%.*]]
 ; CHECK:       inner.1.header:
@@ -233,7 +233,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
 ; CHECK-NEXT:    store i32 [[L_1_7]], ptr [[DST]], align 8
 ; CHECK-NEXT:    [[INNER_1_IV_NEXT_7]] = add i64 [[INNER_1_IV]], 8
 ; CHECK-NEXT:    [[CMP_2_7:%.*]] = icmp sgt i64 [[INNER_1_IV_NEXT_6]], 0
-; CHECK-NEXT:    br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF5:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF6:![0-9]+]]
 ; CHECK:       outer.middle.unr-lcssa:
 ; CHECK-NEXT:    [[L_1_LCSSA_PH:%.*]] = phi i32 [ [[L_1_7]], [[INNER_1_LATCH_7]] ]
 ; CHECK-NEXT:    br label [[OUTER_MIDDLE]]

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
index 00ddfc82feeb4d6..6e3bbe16167e730 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: for.body.epil:
 ; CHECK: br i1 [[COND2:%.*]], label  %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof ![[#PROF2:]], !llvm.loop ![[#LOOP2:]]
 ; CHECK: ![[#PROF]] = !{!"branch_weights", i32 1, i32 2499}
-; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 3, i32 1}
+; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 1, i32 1}
 
 define i3 @test(ptr %a, i3 %n) {
 entry:

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
index 15accc8067c8cc8..8acf74a84d2baba 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -18,41 +18,54 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; COMMON-LABEL: @test(
 
-; EPILOG: %xtraiter = and i32 %n
-; EPILOG:  %lcmp.mod = icmp ne i32 %xtraiter, 0
-; EPILOG:  br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
+; EPILOG: entry:
+; EPILOG:   br i1 %cmp1, label %for.end, label %for.body.preheader, !prof [[EPILOG_PROF_0:![0-9]+]]
+; EPILOG: for.body.preheader:
+; EPILOG:   %xtraiter = and i32 %n
+; EPILOG:   br i1 %1, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new, !prof [[EPILOG_PROF_1:![0-9]+]]
+
+; EPILOG: for.end.loopexit.unr-lcssa:
+; EPILOG:   %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG:   br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit, !prof [[EPILOG_PROF_2:![0-9]+]]
 
 ; NOEPILOG-NOT: %xtraiter = and i32 %n
 
-; PROLOG: %xtraiter = and i32 %n
-; PROLOG:  %lcmp.mod = icmp ne i32 %xtraiter, 0
-; PROLOG:  br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
+; PROLOG: entry:
+; PROLOG:   br i1 %cmp1, label %for.end, label %for.body.preheader, !prof [[PROLOG_PROF_0:![0-9]+]]
+
+; PROLOG: for.body.preheader:
+; PROLOG:   %xtraiter = and i32 %n
+; PROLOG:   %lcmp.mod = icmp ne i32 %xtraiter, 0
+; PROLOG:   br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit, !prof [[PROLOG_PROF_1:![0-9]+]]
 
 ; NOPROLOG-NOT: %xtraiter = and i32 %n
 
 ; EPILOG: for.body.epil:
-; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ],  [ %indvars.iv.unr, %for.body.epil.preheader ]
-; EPILOG:  %epil.iter.next = add i32 %epil.iter, 1
-; EPILOG:  %epil.iter.cmp = icmp ne i32 %epil.iter.next, %xtraiter
-; EPILOG:  br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
+; EPILOG:   %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ],  [ %indvars.iv.unr, %for.body.epil.preheader ]
+; EPILOG:   %epil.iter.next = add i32 %epil.iter, 1
+; EPILOG:   %epil.iter.cmp = icmp ne i32 %epil.iter.next, %xtraiter
+; EPILOG:   br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof [[EPILOG_PROF_3:![0-9]+]], !llvm.loop [[EPILOG_LOOP:![0-9]+]]
 
 ; NOEPILOG: for.body:
 ; NOEPILOG-NOT: for.body.epil:
 
 ; PROLOG: for.body.prol:
-; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
-; PROLOG:  %prol.iter.next = add i32 %prol.iter, 1
-; PROLOG:  %prol.iter.cmp = icmp ne i32 %prol.iter.next, %xtraiter
-; PROLOG:  br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop !0
+; PROLOG:   %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
+; PROLOG:   %prol.iter.next = add i32 %prol.iter, 1
+; PROLOG:   %prol.iter.cmp = icmp ne i32 %prol.iter.next, %xtraiter
+; PROLOG:   br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !prof [[PROLOG_PROF_2:![0-9]+]], !llvm.loop [[PROLOG_LOOP:![0-9]+]]
+
+; PROLOG: for.body.prol.loopexit:
+; PROLOG:   br i1 %2, label %for.end.loopexit, label %for.body.preheader.new, !prof [[PROLOG_PROF_1:![0-9]+]]
 
 ; NOPROLOG: for.body:
 ; NOPROLOG-NOT: for.body.prol:
 
 
-define i32 @test(ptr nocapture %a, i32 %n) nounwind uwtable readonly {
+define i32 @test(ptr nocapture %a, i32 %n) nounwind uwtable readonly !prof !2 {
 entry:
   %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body, !prof !3
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
@@ -63,7 +76,7 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.end, label %for.body
+  br i1 %exitcond, label %for.end, label %for.body, !prof !4
 
 for.end:                                          ; preds = %for.body, %entry
   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
@@ -274,12 +287,24 @@ exit2.loopexit:
 
 !0 = distinct !{!0, !1}
 !1 = !{!"llvm.loop.unroll.runtime.disable"}
+!2 = !{!"function_entry_count", i64 1}
+!3 = !{!"branch_weights", i32 1, i32 11}
+!4 = !{!"branch_weights", i32 1, i32 42}
 
 ; need to use LABEL here to separate function IR matching from metadata matching
 ; COMMON-LABEL: {{^}}!0 =
 
-; EPILOG-SAME: distinct !{!0, !1}
-; EPILOG: !1 = !{!"llvm.loop.unroll.disable"}
+; EPILOG: [[EPILOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11}
+; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 1, i32 127}
+; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1, i32 7}
+; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 3, i32 1}
+
+; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_LOOP_1:![0-9]+]]}
+; EPILOG: [[EPILOG_LOOP_1]] = !{!"llvm.loop.unroll.disable"}
+
+; PROLOG: [[PROLOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11}
+; PROLOG: [[PROLOG_PROF_1]] = !{!"branch_weights", i32 1, i32 127}
+; PROLOG: [[PROLOG_PROF_2]] = !{!"branch_weights", i32 3, i32 1}
 
-; PROLOG-SAME: distinct !{!0, !1}
-; PROLOG: !1 = !{!"llvm.loop.unroll.disable"}
+; PROLOG: distinct !{[[PROLOG_LOOP]], [[PROLOG_LOOP_1:![0-9]+]]}
+; PROLOG: [[PROLOG_LOOP_1]] = !{!"llvm.loop.unroll.disable"}

diff  --git a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
index 725b4cebe55f5b2..20a247f3e7490d4 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
@@ -4,13 +4,13 @@
 
 ; CHECK-LABEL: @bar_prof
 ; CHECK: loop:
-; CHECK: %mul = mul
-; CHECK: %mul.1 = mul
-; CHECK: %mul.2 = mul
-; CHECK: %mul.3 = mul
-; CHECK: br i1 %niter.ncmp.7, label %loop.end.unr-lcssa.loopexit, label %loop, !prof !1
+; CHECK:   %mul = mul
+; CHECK:   %mul.1 = mul
+; CHECK:   %mul.2 = mul
+; CHECK:   %mul.3 = mul
+; CHECK:   br i1 %niter.ncmp.7, label %loop.end.unr-lcssa.loopexit, label %loop, !prof [[PROF0:![0-9]+]]
 ; CHECK: loop.epil:
-; CHECK:   br i1 %epil.iter.cmp, label %loop.epil, label %loop.end.epilog-lcssa, !prof !2, !llvm.loop !3
+; CHECK:   br i1 %epil.iter.cmp, label %loop.epil, label %loop.end.epilog-lcssa, !prof [[PROF1:![0-9]+]], !llvm.loop {{![0-9]+}}
 define i32 @bar_prof(ptr noalias nocapture readonly %src, i64 %c) !prof !1 {
 entry:
   br label %loop
@@ -60,5 +60,5 @@ loop.end:
 !1 = !{!"function_entry_count", i64 1}
 !2 = !{!"branch_weights", i32 1, i32 1000}
 
-; CHECK: !1 = !{!"branch_weights", i32 1, i32 124}
-; CHECK: !2 = !{!"branch_weights", i32 7, i32 1}
\ No newline at end of file
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 124}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 1}


        


More information about the llvm-commits mailing list