[llvm] b40ff5a - [AMDGPU][StructurizeCFG] Maintain branch MD_prof metadata (#109813)

via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 25 04:15:26 PDT 2024


Author: Juan Manuel Martinez CaamaƱo
Date: 2024-09-25T13:15:23+02:00
New Revision: b40ff5ac2d407074db4479c6e271f51c3f5db4c2

URL: https://github.com/llvm/llvm-project/commit/b40ff5ac2d407074db4479c6e271f51c3f5db4c2
DIFF: https://github.com/llvm/llvm-project/commit/b40ff5ac2d407074db4479c6e271f51c3f5db4c2.diff

LOG: [AMDGPU][StructurizeCFG] Maintain branch MD_prof metadata (#109813)

Currently `StructurizeCFG` drops branch_weight metadata .
This metadata can be generated from user annotations in the source code
like:

```cpp
if (...) [[likely]] {
}
```

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
    llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index aca8225cebb3fd..92e47cbc7ae8bf 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/Value.h"
@@ -85,7 +86,43 @@ using PhiMap = MapVector<PHINode *, BBValueVector>;
 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
 
 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
-using BBPredicates = DenseMap<BasicBlock *, Value *>;
+
+using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
+
+class CondBranchWeights {
+  uint32_t TrueWeight;
+  uint32_t FalseWeight;
+
+  CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
+
+public:
+  static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
+    assert(Br.isConditional());
+
+    uint64_t T, F;
+    if (!extractBranchWeights(Br, T, F))
+      return std::nullopt;
+
+    return CondBranchWeights(T, F);
+  }
+
+  static void setMetadata(BranchInst &Br,
+                          const MaybeCondBranchWeights &Weights) {
+    assert(Br.isConditional());
+    if (!Weights)
+      return;
+    uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
+    setBranchWeights(Br, Arr, false);
+  }
+
+  CondBranchWeights invert() const {
+    return CondBranchWeights{FalseWeight, TrueWeight};
+  }
+};
+
+using ValueWeightPair = std::pair<Value *, MaybeCondBranchWeights>;
+
+using BBPredicates = DenseMap<BasicBlock *, ValueWeightPair>;
 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
 
@@ -271,7 +308,7 @@ class StructurizeCFG {
 
   void analyzeLoops(RegionNode *N);
 
-  Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
+  ValueWeightPair buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
 
   void gatherPredicates(RegionNode *N);
 
@@ -449,16 +486,22 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
 }
 
 /// Build the condition for one edge
-Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
-                                      bool Invert) {
+ValueWeightPair StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
+                                               bool Invert) {
   Value *Cond = Invert ? BoolFalse : BoolTrue;
+  MaybeCondBranchWeights Weights;
+
   if (Term->isConditional()) {
     Cond = Term->getCondition();
+    Weights = CondBranchWeights::tryParse(*Term);
 
-    if (Idx != (unsigned)Invert)
+    if (Idx != (unsigned)Invert) {
       Cond = invertCondition(Cond);
+      if (Weights)
+        Weights = Weights->invert();
+    }
   }
-  return Cond;
+  return {Cond, Weights};
 }
 
 /// Analyze the predecessors of each block and build up predicates
@@ -490,8 +533,8 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
             if (Visited.count(Other) && !Loops.count(Other) &&
                 !Pred.count(Other) && !Pred.count(P)) {
 
-              Pred[Other] = BoolFalse;
-              Pred[P] = BoolTrue;
+              Pred[Other] = {BoolFalse, std::nullopt};
+              Pred[P] = {BoolTrue, std::nullopt};
               continue;
             }
           }
@@ -512,9 +555,9 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
 
       BasicBlock *Entry = R->getEntry();
       if (Visited.count(Entry))
-        Pred[Entry] = BoolTrue;
+        Pred[Entry] = {BoolTrue, std::nullopt};
       else
-        LPred[Entry] = BoolFalse;
+        LPred[Entry] = {BoolFalse, std::nullopt};
     }
   }
 }
@@ -578,12 +621,14 @@ void StructurizeCFG::insertConditions(bool Loops) {
     Dominator.addBlock(Parent);
 
     Value *ParentValue = nullptr;
-    for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
+    MaybeCondBranchWeights ParentWeights = std::nullopt;
+    for (std::pair<BasicBlock *, ValueWeightPair> BBAndPred : Preds) {
       BasicBlock *BB = BBAndPred.first;
-      Value *Pred = BBAndPred.second;
+      auto [Pred, Weight] = BBAndPred.second;
 
       if (BB == Parent) {
         ParentValue = Pred;
+        ParentWeights = Weight;
         break;
       }
       PhiInserter.AddAvailableValue(BB, Pred);
@@ -592,6 +637,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
 
     if (ParentValue) {
       Term->setCondition(ParentValue);
+      CondBranchWeights::setMetadata(*Term, ParentWeights);
     } else {
       if (!Dominator.resultIsRememberedBlock())
         PhiInserter.AddAvailableValue(Dominator.result(), Default);
@@ -607,7 +653,7 @@ void StructurizeCFG::simplifyConditions() {
   for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
     auto &Preds = I.second;
     for (auto &J : Preds) {
-      auto &Cond = J.second;
+      Value *Cond = J.second.first;
       Instruction *Inverted;
       if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
           !Cond->use_empty()) {
@@ -904,9 +950,10 @@ void StructurizeCFG::setPrevNode(BasicBlock *BB) {
 /// Does BB dominate all the predicates of Node?
 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
   BBPredicates &Preds = Predicates[Node->getEntry()];
-  return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
-    return DT->dominates(BB, Pred.first);
-  });
+  return llvm::all_of(Preds,
+                      [&](std::pair<BasicBlock *, ValueWeightPair> Pred) {
+                        return DT->dominates(BB, Pred.first);
+                      });
 }
 
 /// Can we predict that this node will always be called?
@@ -918,9 +965,9 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
   if (!PrevNode)
     return true;
 
-  for (std::pair<BasicBlock*, Value*> Pred : Preds) {
+  for (std::pair<BasicBlock *, ValueWeightPair> Pred : Preds) {
     BasicBlock *BB = Pred.first;
-    Value *V = Pred.second;
+    Value *V = Pred.second.first;
 
     if (V != BoolTrue)
       return false;

diff  --git a/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll
index 862c50c6183f16..cdf5ca569701be 100644
--- a/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll
+++ b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll
@@ -5,7 +5,7 @@ define amdgpu_ps i32 @if_else(i32 %0) {
 ; OPT-LABEL: define amdgpu_ps i32 @if_else(
 ; OPT-SAME: i32 [[TMP0:%.*]]) {
 ; OPT-NEXT:    [[C:%.*]] = icmp ne i32 [[TMP0]], 0
-; OPT-NEXT:    br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]]
+; OPT-NEXT:    br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]], !prof [[PROF0:![0-9]+]]
 ; OPT:       [[FLOW]]:
 ; OPT-NEXT:    [[TMP2:%.*]] = phi i32 [ 33, %[[FALSE]] ], [ undef, [[TMP1:%.*]] ]
 ; OPT-NEXT:    [[TMP3:%.*]] = phi i1 [ false, %[[FALSE]] ], [ true, [[TMP1]] ]
@@ -40,7 +40,7 @@ define amdgpu_ps void @loop_if_break(i32 %n) {
 ; OPT:       [[LOOP]]:
 ; OPT-NEXT:    [[I:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[FLOW:.*]] ]
 ; OPT-NEXT:    [[C:%.*]] = icmp ugt i32 [[I]], 0
-; OPT-NEXT:    br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]]
+; OPT-NEXT:    br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]], !prof [[PROF1:![0-9]+]]
 ; OPT:       [[LOOP_BODY]]:
 ; OPT-NEXT:    [[I_NEXT:%.*]] = sub i32 [[I]], 1
 ; OPT-NEXT:    br label %[[FLOW]]
@@ -70,3 +70,7 @@ exit:                                             ; preds = %loop
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 
 !0 = !{!"branch_weights", i32 1000, i32 1}
+;.
+; OPT: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000}
+; OPT: [[PROF1]] = !{!"branch_weights", i32 1000, i32 1}
+;.


        


More information about the llvm-commits mailing list