[llvm] d2500e6 - [pgo] add means to specify "unknown" MD_prof (#145578)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 30 16:57:15 PDT 2025


Author: Mircea Trofin
Date: 2025-06-30T16:57:11-07:00
New Revision: d2500e639b641b0cfdd1564cc6ff4705b118f10c

URL: https://github.com/llvm/llvm-project/commit/d2500e639b641b0cfdd1564cc6ff4705b118f10c
DIFF: https://github.com/llvm/llvm-project/commit/d2500e639b641b0cfdd1564cc6ff4705b118f10c.diff

LOG: [pgo] add means to specify "unknown" MD_prof (#145578)

This PR is part of https://discourse.llvm.org/t/rfc-profile-information-propagation-unittesting/73595

In a slight departure from the RFC, instead of a brand-new `MD_prof_unknown` kind, this adds a first operand to `MD_prof` metadata. This makes it easy to replace with valid metadata (only one `MD_prof`), otherwise sites inserting valid `MD_prof` would also have to check to remove the `unknown` one.

The patch just introduces the notion and fixes the verifier accordingly. Existing APIs working (esp. reading) `MD_prof` will be updated subsequently.

Added: 
    

Modified: 
    llvm/include/llvm/IR/ProfDataUtils.h
    llvm/lib/IR/ProfDataUtils.cpp
    llvm/lib/IR/Verifier.cpp
    llvm/test/Verifier/branch-weight.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h
index c24b2aa19a407..bc3b046dadb46 100644
--- a/llvm/include/llvm/IR/ProfDataUtils.h
+++ b/llvm/include/llvm/IR/ProfDataUtils.h
@@ -27,6 +27,7 @@ struct MDProfLabels {
   static const char *FunctionEntryCount;
   static const char *SyntheticFunctionEntryCount;
   static const char *ExpectedBranchWeights;
+  static const char *UnknownBranchWeightsMarker;
 };
 
 /// Checks if an Instruction has MD_prof Metadata
@@ -143,6 +144,18 @@ LLVM_ABI bool extractProfTotalWeight(const Instruction &I,
 LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,
                                bool IsExpected);
 
+/// Specify that the branch weights for this terminator cannot be known at
+/// compile time. This should only be called by passes, and never as a default
+/// behavior in e.g. MDBuilder. The goal is to use this info to validate passes
+/// do not accidentally drop profile info, and this API is called in cases where
+/// the pass explicitly cannot provide that info. Defaulting it in would hide
+/// bugs where the pass forgets to transfer over or otherwise specify profile
+/// info.
+LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I);
+
+LLVM_ABI bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD);
+LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I);
+
 /// Scaling the profile data attached to 'I' using the ratio of S/T.
 LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T);
 

diff  --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index 605208edda70a..b1b5f67689e6d 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -94,6 +94,7 @@ const char *MDProfLabels::ValueProfile = "VP";
 const char *MDProfLabels::FunctionEntryCount = "function_entry_count";
 const char *MDProfLabels::SyntheticFunctionEntryCount =
     "synthetic_function_entry_count";
+const char *MDProfLabels::UnknownBranchWeightsMarker = "unknown";
 
 bool hasProfMD(const Instruction &I) {
   return I.hasMetadata(LLVMContext::MD_prof);
@@ -241,6 +242,27 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) {
   return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal);
 }
 
+void setExplicitlyUnknownBranchWeights(Instruction &I) {
+  MDBuilder MDB(I.getContext());
+  I.setMetadata(
+      LLVMContext::MD_prof,
+      MDNode::get(I.getContext(),
+                  MDB.createString(MDProfLabels::UnknownBranchWeightsMarker)));
+}
+
+bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD) {
+  if (MD.getNumOperands() != 1)
+    return false;
+  return MD.getOperand(0).equalsStr(MDProfLabels::UnknownBranchWeightsMarker);
+}
+
+bool hasExplicitlyUnknownBranchWeights(const Instruction &I) {
+  auto *MD = I.getMetadata(LLVMContext::MD_prof);
+  if (!MD)
+    return false;
+  return isExplicitlyUnknownBranchWeightsMetadata(*MD);
+}
+
 void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,
                       bool IsExpected) {
   MDBuilder MDB(I.getContext());

diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 853f3b45fd10c..227afe2b7b61b 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -2527,6 +2527,12 @@ void Verifier::verifyFunctionMetadata(
   for (const auto &Pair : MDs) {
     if (Pair.first == LLVMContext::MD_prof) {
       MDNode *MD = Pair.second;
+      if (isExplicitlyUnknownBranchWeightsMetadata(*MD)) {
+        CheckFailed("'unknown' !prof metadata should appear only on "
+                    "instructions supporting the 'branch_weights' metadata",
+                    MD);
+        continue;
+      }
       Check(MD->getNumOperands() >= 2,
             "!prof annotations should have no less than 2 operands", MD);
 
@@ -4989,9 +4995,24 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) {
 }
 
 void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) {
-  Check(MD->getNumOperands() >= 2,
-        "!prof annotations should have no less than 2 operands", MD);
-
+  auto GetBranchingTerminatorNumOperands = [&]() {
+    unsigned ExpectedNumOperands = 0;
+    if (BranchInst *BI = dyn_cast<BranchInst>(&I))
+      ExpectedNumOperands = BI->getNumSuccessors();
+    else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I))
+      ExpectedNumOperands = SI->getNumSuccessors();
+    else if (isa<CallInst>(&I))
+      ExpectedNumOperands = 1;
+    else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(&I))
+      ExpectedNumOperands = IBI->getNumDestinations();
+    else if (isa<SelectInst>(&I))
+      ExpectedNumOperands = 2;
+    else if (CallBrInst *CI = dyn_cast<CallBrInst>(&I))
+      ExpectedNumOperands = CI->getNumSuccessors();
+    return ExpectedNumOperands;
+  };
+  Check(MD->getNumOperands() >= 1,
+        "!prof annotations should have at least 1 operand", MD);
   // Check first operand.
   Check(MD->getOperand(0) != nullptr, "first operand should not be null", MD);
   Check(isa<MDString>(MD->getOperand(0)),
@@ -4999,6 +5020,19 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) {
   MDString *MDS = cast<MDString>(MD->getOperand(0));
   StringRef ProfName = MDS->getString();
 
+  if (ProfName == MDProfLabels::UnknownBranchWeightsMarker) {
+    Check(GetBranchingTerminatorNumOperands() != 0 || isa<InvokeInst>(I),
+          "'unknown' !prof should only appear on instructions on which "
+          "'branch_weights' would",
+          MD);
+    Check(MD->getNumOperands() == 1,
+          "'unknown' !prof should have no additional operands", MD);
+    return;
+  }
+
+  Check(MD->getNumOperands() >= 2,
+        "!prof annotations should have no less than 2 operands", MD);
+
   // Check consistency of !prof branch_weights metadata.
   if (ProfName == MDProfLabels::BranchWeights) {
     unsigned NumBranchWeights = getNumBranchWeights(*MD);
@@ -5006,20 +5040,8 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) {
       Check(NumBranchWeights == 1 || NumBranchWeights == 2,
             "Wrong number of InvokeInst branch_weights operands", MD);
     } else {
-      unsigned ExpectedNumOperands = 0;
-      if (BranchInst *BI = dyn_cast<BranchInst>(&I))
-        ExpectedNumOperands = BI->getNumSuccessors();
-      else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I))
-        ExpectedNumOperands = SI->getNumSuccessors();
-      else if (isa<CallInst>(&I))
-        ExpectedNumOperands = 1;
-      else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(&I))
-        ExpectedNumOperands = IBI->getNumDestinations();
-      else if (isa<SelectInst>(&I))
-        ExpectedNumOperands = 2;
-      else if (CallBrInst *CI = dyn_cast<CallBrInst>(&I))
-        ExpectedNumOperands = CI->getNumSuccessors();
-      else
+      const unsigned ExpectedNumOperands = GetBranchingTerminatorNumOperands();
+      if (ExpectedNumOperands == 0)
         CheckFailed("!prof branch_weights are not allowed for this instruction",
                     MD);
 

diff  --git a/llvm/test/Verifier/branch-weight.ll b/llvm/test/Verifier/branch-weight.ll
index e3b0f340e31bc..4c87a98359019 100644
--- a/llvm/test/Verifier/branch-weight.ll
+++ b/llvm/test/Verifier/branch-weight.ll
@@ -1,21 +1,65 @@
 ; Test MD_prof validation
 
 ; RUN: split-file %s %t
+
 ; RUN: opt -passes=verify %t/valid.ll --disable-output
-; RUN: not opt -passes=verify %t/invalid1.ll --disable-output 2>&1 | FileCheck %s
-; RUN: not opt -passes=verify %t/invalid2.ll --disable-output 2>&1 | FileCheck %s
+
+; RUN: not opt -passes=verify %t/wrong-count.ll --disable-output 2>&1 | FileCheck %s --check-prefix=WRONG-COUNT
+; RUN: not opt -passes=verify %t/invalid-name1.ll --disable-output 2>&1 | FileCheck %s
+; RUN: not opt -passes=verify %t/invalid-name2.ll --disable-output 2>&1 | FileCheck %s
+
+; RUN: opt -passes=verify %t/unknown-correct.ll --disable-output
+
+; RUN: not opt -passes=verify %t/unknown-invalid.ll --disable-output 2>&1 | FileCheck %s --check-prefix=EXTRA-ARGS
+; RUN: not opt -passes=verify %t/unknown-on-function1.ll --disable-output 2>&1 | FileCheck %s --check-prefix=ON-FUNCTION1
+; RUN: not opt -passes=verify %t/unknown-on-function2.ll --disable-output 2>&1 | FileCheck %s --check-prefix=ON-FUNCTION2
+; RUN: not opt -passes=verify %t/invalid-unknown-placement.ll --disable-output 2>&1 | FileCheck %s --check-prefix=INVALID-UNKNOWN-PLACEMENT
 
 ;--- valid.ll
-define void @test(i1 %0) {
-  br i1 %0, label %2, label %3, !prof !0
-2:
+declare void @to_invoke()
+declare i32 @__gxx_personality_v0(...)
+
+define void @invoker() personality ptr @__gxx_personality_v0 {
+  invoke void @to_invoke() to label %exit unwind label %lpad, !prof !0
+lpad:
+  %ll = landingpad {ptr, i32}
+  cleanup
   ret void
-3:
+exit:
   ret void
 }
+
+define i32 @test(i32 %a) {
+  %c = icmp eq i32 %a, 0
+  br i1 %c, label %yes, label %exit, !prof !0
+yes:
+  switch i32 %a, label %exit [ i32 1, label %case_b
+                               i32 2, label %case_c], !prof !1
+case_b:
+  br label %exit
+case_c:
+  br label %exit
+exit:
+  %r = select i1 %c, i32 1, i32 2, !prof !0
+  ret i32 %r
+}
 !0 = !{!"branch_weights", i32 1, i32 2}
+!1 = !{!"branch_weights", i32 1, i32 2, i32 3}
+
+;--- wrong-count.ll
+define void @test(i32 %a) {
+  %c = icmp eq i32 %a, 0
+  br i1 %c, label %yes, label %no, !prof !0
+yes:
+  ret void
+no:
+  ret void
+}
+!0 = !{!"branch_weights", i32 1, i32 2, i32 3}
 
-;--- invalid1.ll
+; WRONG-COUNT: Wrong number of operands
+
+;--- invalid-name1.ll
 define void @test(i1 %0) {
   br i1 %0, label %2, label %3, !prof !0
 2:
@@ -25,7 +69,7 @@ define void @test(i1 %0) {
 }
 !0 = !{!"invalid", i32 1, i32 2}
 
-;--- invalid2.ll
+;--- invalid-name2.ll
 define void @test(i1 %0) {
   br i1 %0, label %2, label %3, !prof !0
 2:
@@ -37,3 +81,71 @@ define void @test(i1 %0) {
 !0 = !{!"function_entry_count", i32 1}
 
 ; CHECK: expected either branch_weights or VP profile name
+
+;--- unknown-correct.ll
+declare void @to_invoke()
+declare i32 @__gxx_personality_v0(...)
+
+define void @invoker() personality ptr @__gxx_personality_v0 {
+  invoke void @to_invoke() to label %exit unwind label %lpad, !prof !0
+lpad:
+  %ll = landingpad {ptr, i32}
+  cleanup
+  ret void
+exit:
+  ret void
+}
+
+define i32 @test(i32 %a) {
+  %c = icmp eq i32 %a, 0
+  br i1 %c, label %yes, label %exit, !prof !0
+yes:
+  switch i32 %a, label %exit [ i32 1, label %case_b
+                               i32 2, label %case_c], !prof !0
+case_b:
+  br label %exit
+case_c:
+  br label %exit
+exit:
+  %r = select i1 %c, i32 1, i32 2, !prof !0
+  ret i32 %r
+}
+
+!0 = !{!"unknown"}
+
+;--- unknown-invalid.ll
+define void @test(i32 %a) {
+  %c = icmp eq i32 %a, 0
+  br i1 %c, label %yes, label %no, !prof !0
+yes:
+  ret void
+no:
+  ret void
+}
+
+!0 = !{!"unknown", i32 12, i32 67}
+; EXTRA-ARGS: 'unknown' !prof should have no additional operands
+
+;--- unknown-on-function1.ll
+define void @test() !prof !0 {
+  ret void
+}
+
+!0 = !{!"unknown"}
+; ON-FUNCTION1: 'unknown' !prof metadata should appear only on instructions supporting the 'branch_weights' metadata
+
+;--- unknown-on-function2.ll
+define void @test() !prof !0 {
+  ret void
+}
+
+!0 = !{!"unknown", i64 123}
+; ON-FUNCTION2: first operand should be 'function_entry_count' or 'synthetic_function_entry_count'
+
+;--- invalid-unknown-placement.ll
+define i32 @test() {
+  %r = add i32 1, 2, !prof !0
+  ret i32 %r
+}
+!0 = !{!"unknown"}
+; INVALID-UNKNOWN-PLACEMENT: 'unknown' !prof should only appear on instructions on which 'branch_weights' would


        


More information about the llvm-commits mailing list