[llvm] Revert "[MemProf] Ensure all callsite clones are assigned a function clone" (PR #150856)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 27 15:44:38 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-lto

Author: Teresa Johnson (teresajohnson)

<details>
<summary>Changes</summary>

Reverts llvm/llvm-project#<!-- -->150735 due to bot failures that I need to investigate

---
Full diff: https://github.com/llvm/llvm-project/pull/150856.diff


3 Files Affected:

- (modified) llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp (+12-84) 
- (removed) llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll (-145) 
- (removed) llvm/test/Transforms/MemProfContextDisambiguation/func_assign_fix.ll (-130) 


``````````diff
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 81de0a59321d3..0164fcd71419e 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -97,8 +97,6 @@ STATISTIC(MissingAllocForContextId,
           "Number of missing alloc nodes for context ids");
 STATISTIC(SkippedCallsCloning,
           "Number of calls skipped during cloning due to unexpected operand");
-STATISTIC(MismatchedCloneAssignments,
-          "Number of callsites assigned to call multiple non-matching clones");
 
 static cl::opt<std::string> DotFilePathPrefix(
     "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
@@ -2062,20 +2060,6 @@ static bool isMemProfClone(const Function &F) {
   return F.getName().contains(MemProfCloneSuffix);
 }
 
-// Return the clone number of the given function by extracting it from the
-// memprof suffix. Assumes the caller has already confirmed it is a memprof
-// clone.
-static unsigned getMemProfCloneNum(const Function &F) {
-  assert(isMemProfClone(F));
-  auto Pos = F.getName().find_last_of('.');
-  assert(Pos > 0);
-  unsigned CloneNo;
-  bool Err = F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
-  assert(!Err);
-  (void)Err;
-  return CloneNo;
-}
-
 std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
                                                  const Instruction *Call,
                                                  unsigned CloneNo) const {
@@ -3995,22 +3979,7 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
 
 void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
                                             FuncInfo CalleeFunc) {
-  auto *CurF = cast<CallBase>(CallerCall.call())->getCalledFunction();
-  auto NewCalleeCloneNo = CalleeFunc.cloneNo();
-  if (isMemProfClone(*CurF)) {
-    // If we already assigned this callsite to call a specific non-default
-    // clone (i.e. not the original function which is clone 0), ensure that we
-    // aren't trying to now update it to call a different clone, which is
-    // indicative of a bug in the graph or function assignment.
-    auto CurCalleeCloneNo = getMemProfCloneNum(*CurF);
-    if (CurCalleeCloneNo != NewCalleeCloneNo) {
-      LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
-                        << CurCalleeCloneNo << " now " << NewCalleeCloneNo
-                        << "\n");
-      MismatchedCloneAssignments++;
-    }
-  }
-  if (NewCalleeCloneNo > 0)
+  if (CalleeFunc.cloneNo() > 0)
     cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
   OREGetter(CallerCall.call()->getFunction())
       .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
@@ -4026,19 +3995,7 @@ void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
   assert(CI &&
          "Caller cannot be an allocation which should not have profiled calls");
   assert(CI->Clones.size() > CallerCall.cloneNo());
-  auto NewCalleeCloneNo = CalleeFunc.cloneNo();
-  auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
-  // If we already assigned this callsite to call a specific non-default
-  // clone (i.e. not the original function which is clone 0), ensure that we
-  // aren't trying to now update it to call a different clone, which is
-  // indicative of a bug in the graph or function assignment.
-  if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
-    LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
-                      << CurCalleeCloneNo << " now " << NewCalleeCloneNo
-                      << "\n");
-    MismatchedCloneAssignments++;
-  }
-  CurCalleeCloneNo = NewCalleeCloneNo;
+  CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
 }
 
 // Update the debug information attached to NewFunc to use the clone Name. Note
@@ -4746,19 +4703,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
           // where the callers were assigned to different clones of a function.
         }
 
-        auto FindFirstAvailFuncClone = [&]() {
-          // Find first function in FuncClonesToCallMap without an assigned
-          // clone of this callsite Node. We should always have one
-          // available at this point due to the earlier cloning when the
-          // FuncClonesToCallMap size was smaller than the clone number.
-          for (auto &CF : FuncClonesToCallMap) {
-            if (!FuncCloneToCurNodeCloneMap.count(CF.first))
-              return CF.first;
-          }
-          assert(false &&
-                 "Expected an available func clone for this callsite clone");
-        };
-
         // See if we can use existing function clone. Walk through
         // all caller edges to see if any have already been assigned to
         // a clone of this callsite's function. If we can use it, do so. If not,
@@ -4875,7 +4819,16 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
             // clone of OrigFunc for another caller during this iteration over
             // its caller edges.
             if (!FuncCloneAssignedToCurCallsiteClone) {
-              FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
+              // Find first function in FuncClonesToCallMap without an assigned
+              // clone of this callsite Node. We should always have one
+              // available at this point due to the earlier cloning when the
+              // FuncClonesToCallMap size was smaller than the clone number.
+              for (auto &CF : FuncClonesToCallMap) {
+                if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
+                  FuncCloneAssignedToCurCallsiteClone = CF.first;
+                  break;
+                }
+              }
               assert(FuncCloneAssignedToCurCallsiteClone);
               // Assign Clone to FuncCloneAssignedToCurCallsiteClone
               AssignCallsiteCloneToFuncClone(
@@ -4889,31 +4842,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
                                        FuncCloneAssignedToCurCallsiteClone);
           }
         }
-        // If we didn't assign a function clone to this callsite clone yet, e.g.
-        // none of its callers has a non-null call, do the assignment here.
-        // We want to ensure that every callsite clone is assigned to some
-        // function clone, so that the call updates below work as expected.
-        // In particular if this is the original callsite, we want to ensure it
-        // is assigned to the original function, otherwise the original function
-        // will appear available for assignment to other callsite clones,
-        // leading to unintended effects. For one, the unknown and not updated
-        // callers will call into cloned paths leading to the wrong hints,
-        // because they still call the original function (clone 0). Also,
-        // because all callsites start out as being clone 0 by default, we can't
-        // easily distinguish between callsites explicitly assigned to clone 0
-        // vs those never assigned, which can lead to multiple updates of the
-        // calls when invoking updateCall below, with mismatched clone values.
-        // TODO: Add a flag to the callsite nodes or some other mechanism to
-        // better distinguish and identify callsite clones that are not getting
-        // assigned to function clones as expected.
-        if (!FuncCloneAssignedToCurCallsiteClone) {
-          FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
-          assert(FuncCloneAssignedToCurCallsiteClone &&
-                 "No available func clone for this callsite clone");
-          AssignCallsiteCloneToFuncClone(
-              FuncCloneAssignedToCurCallsiteClone, Call, Clone,
-              /*IsAlloc=*/AllocationCallToContextNodeMap.contains(Call));
-        }
       }
       if (VerifyCCG) {
         checkNode<DerivedCCG, FuncTy, CallTy>(Node);
diff --git a/llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll b/llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll
deleted file mode 100644
index bd358509bcb3a..0000000000000
--- a/llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll
+++ /dev/null
@@ -1,145 +0,0 @@
-;; Make sure we assign the original callsite to a function clone (which will be
-;; the original function clone), even when we cannot update its caller (due to
-;; missing metadata e.g. from mismatched profiles). Otherwise we will try to use
-;; the original function for a different clone, leading to confusion later when
-;; rewriting the calls.
-
-;; -stats requires asserts
-; REQUIRES: asserts
-
-; RUN: opt -thinlto-bc %s >%t.o
-; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
-; RUN:  -supports-hot-cold-new \
-; RUN:  -r=%t.o,A,plx \
-; RUN:  -r=%t.o,B,plx \
-; RUN:  -r=%t.o,C,plx \
-; RUN:  -r=%t.o,D,plx \
-; RUN:  -r=%t.o,E,plx \
-; RUN:  -r=%t.o,F,plx \
-; RUN:  -r=%t.o,G,plx \
-; RUN:  -r=%t.o,A1,plx \
-; RUN:  -r=%t.o,B1,plx \
-; RUN:  -r=%t.o,_Znwm, \
-; RUN:  -memprof-verify-ccg -memprof-verify-nodes -debug-only=memprof-context-disambiguation \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation -save-temps \
-; RUN:  -o %t.out 2>&1 | FileCheck %s \
-; RUN:	--implicit-check-not="Mismatch in call clone assignment" \
-; RUN:	--implicit-check-not="Number of callsites assigned to call multiple non-matching clones"
-
-; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
-
-; ModuleID = '<stdin>'
-source_filename = "reduced.ll"
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-grtev4-linux-gnu"
-
-; IR-LABEL: define dso_local void @A()
-define void @A() #0 {
-  ; IR: call void @C()
-  call void @C()
-  ret void
-}
-
-; IR-LABEL: define dso_local void @B()
-define void @B() #0 {
-  ; IR: call void @C.memprof.1()
-  call void @C(), !callsite !1
-  ret void
-}
-
-; IR-LABEL: define dso_local void @C()
-define void @C() #0 {
-  ; IR: call void @F()
-  call void @F(), !callsite !16
-  ; IR: call void @D()
-  call void @D(), !callsite !2
-  ret void
-}
-
-; IR-LABEL: define dso_local void @D()
-define void @D() #0 {
-  ; IR: call void @E()
-  call void @E(), !callsite !3
-  ; IR: call void @G()
-  call void @G(), !callsite !17
-  ret void
-}
-
-; IR-LABEL: define dso_local void @E()
-define void @E() #0 {
-  ; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD:[0-9]+]]
-  %1 = call ptr @_Znwm(i64 0), !memprof !4, !callsite !9
-  ret void
-}
-
-; IR-LABEL: define dso_local void @F()
-define void @F() #0 {
-  ; IR: call void @G()
-  call void @G(), !callsite !17
-  ret void
-}
-
-; IR-LABEL: define dso_local void @G()
-define void @G() #0 {
-  ; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD]]
-  %2 = call ptr @_Znwm(i64 0), !memprof !10, !callsite !15
-  ret void
-}
-
-; IR-LABEL: define dso_local void @A1()
-define void @A1() #0 {
-  ; IR: call void @C()
-  call void @C(), !callsite !18
-  ret void
-}
-
-; IR-LABEL: define dso_local void @B1()
-define void @B1() #0 {
-  ; IR: call void @C.memprof.1()
-  call void @C(), !callsite !19
-  ret void
-}
-
-; IR-LABEL: define dso_local void @C.memprof.1()
-  ; IR: call void @F.memprof.1()
-  ; IR: call void @D.memprof.1()
-
-; IR-LABEL: define dso_local void @D.memprof.1()
-  ; IR: call void @E.memprof.1()
-  ; IR: call void @G()
-
-; IR-LABEL: define dso_local void @E.memprof.1()
-  ; IR: call ptr @_Znwm(i64 0) #[[COLD:[0-9]+]]
-
-; IR-LABEL: define dso_local void @F.memprof.1()
-  ; IR: call void @G.memprof.1()
-
-; IR-LABEL: define dso_local void @G.memprof.1()
-  ; IR: call ptr @_Znwm(i64 0) #[[COLD]]
-
-declare ptr @_Znwm(i64)
-
-attributes #0 = { noinline optnone }
-; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
-; IR: attributes #[[COLD]] = { "memprof"="cold" }
-
-!0 = !{i64 123}
-!1 = !{i64 234}
-!2 = !{i64 345}
-!3 = !{i64 456}
-!4 = !{!5, !7}
-!5 = !{!6, !"notcold"}
-!6 = !{i64 567, i64 456, i64 345, i64 123}
-!7 = !{!8, !"cold"}
-!8 = !{i64 567, i64 456, i64 345, i64 234}
-!9 = !{i64 567}
-!10 = !{!11, !13}
-!11 = !{!12, !"notcold"}
-!12 = !{i64 678, i64 891, i64 789, i64 912}
-!13 = !{!14, !"cold"}
-!14 = !{i64 678, i64 891, i64 789, i64 812}
-!15 = !{i64 678}
-!16 = !{i64 789}
-!17 = !{i64 891}
-!18 = !{i64 912}
-!19 = !{i64 812}
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/func_assign_fix.ll b/llvm/test/Transforms/MemProfContextDisambiguation/func_assign_fix.ll
deleted file mode 100644
index 29c58c9c87ed9..0000000000000
--- a/llvm/test/Transforms/MemProfContextDisambiguation/func_assign_fix.ll
+++ /dev/null
@@ -1,130 +0,0 @@
-;; Make sure we assign the original callsite to a function clone (which will be
-;; the original function clone), even when we cannot update its caller (due to
-;; missing metadata e.g. from mismatched profiles). Otherwise we will try to use
-;; the original function for a different clone, leading to confusion later when
-;; rewriting the calls.
-
-;; -stats requires asserts
-; REQUIRES: asserts
-
-; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
-; RUN:		-memprof-verify-ccg -memprof-verify-nodes -stats -debug \
-; RUN: 		-pass-remarks=memprof-context-disambiguation %s -S 2>&1 | \
-; RUN:	FileCheck %s --implicit-check-not="Mismatch in call clone assignment" \
-; RUN:		--implicit-check-not="Number of callsites assigned to call multiple non-matching clones"
-
-
-; ModuleID = '<stdin>'
-source_filename = "reduced.ll"
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-grtev4-linux-gnu"
-
-; CHECK-LABEL: define void @A()
-define void @A() {
-  ; CHECK: call void @C()
-  call void @C()
-  ret void
-}
-
-; CHECK-LABEL: define void @B()
-define void @B() {
-  ; CHECK: call void @C.memprof.1()
-  call void @C(), !callsite !1
-  ret void
-}
-
-; CHECK-LABEL: define void @C()
-define void @C() {
-  ; CHECK: call void @F()
-  call void @F(), !callsite !16
-  ; CHECK: call void @D()
-  call void @D(), !callsite !2
-  ret void
-}
-
-; CHECK-LABEL: define void @D()
-define void @D() {
-  ; CHECK: call void @E()
-  call void @E(), !callsite !3
-  ; CHECK: call void @G()
-  call void @G(), !callsite !17
-  ret void
-}
-
-; CHECK-LABEL: define void @E()
-define void @E() {
-  ; CHECK: call ptr @_Znwm(i64 0) #[[NOTCOLD:[0-9]+]]
-  %1 = call ptr @_Znwm(i64 0), !memprof !4, !callsite !9
-  ret void
-}
-
-; CHECK-LABEL: define void @F()
-define void @F() {
-  ; CHECK: call void @G()
-  call void @G(), !callsite !17
-  ret void
-}
-
-; CHECK-LABEL: define void @G()
-define void @G() {
-  ; CHECK: call ptr @_Znwm(i64 0) #[[NOTCOLD]]
-  %2 = call ptr @_Znwm(i64 0), !memprof !10, !callsite !15
-  ret void
-}
-
-; CHECK-LABEL: define void @A1()
-define void @A1() {
-  ; CHECK: call void @C()
-  call void @C(), !callsite !18
-  ret void
-}
-
-; CHECK-LABEL: define void @B1()
-define void @B1() {
-  ; CHECK: call void @C.memprof.1()
-  call void @C(), !callsite !19
-  ret void
-}
-
-; CHECK-LABEL: define void @C.memprof.1()
-  ; CHECK: call void @F.memprof.1()
-  ; CHECK: call void @D.memprof.1()
-
-; CHECK-LABEL: define void @D.memprof.1()
-  ; CHECK: call void @E.memprof.1()
-  ; CHECK: call void @G()
-
-; CHECK-LABEL: define void @E.memprof.1()
-  ; CHECK: call ptr @_Znwm(i64 0) #[[COLD:[0-9]+]]
-
-; CHECK-LABEL: define void @F.memprof.1()
-  ; CHECK: call void @G.memprof.1()
-
-; CHECK-LABEL: define void @G.memprof.1()
-  ; CHECK: call ptr @_Znwm(i64 0) #[[COLD]]
-
-declare ptr @_Znwm(i64)
-
-; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
-; IR: attributes #[[COLD]] = { "memprof"="cold" }
-
-!0 = !{i64 123}
-!1 = !{i64 234}
-!2 = !{i64 345}
-!3 = !{i64 456}
-!4 = !{!5, !7}
-!5 = !{!6, !"notcold"}
-!6 = !{i64 567, i64 456, i64 345, i64 123}
-!7 = !{!8, !"cold"}
-!8 = !{i64 567, i64 456, i64 345, i64 234}
-!9 = !{i64 567}
-!10 = !{!11, !13}
-!11 = !{!12, !"notcold"}
-!12 = !{i64 678, i64 891, i64 789, i64 912}
-!13 = !{!14, !"cold"}
-!14 = !{i64 678, i64 891, i64 789, i64 812}
-!15 = !{i64 678}
-!16 = !{i64 789}
-!17 = !{i64 891}
-!18 = !{i64 912}
-!19 = !{i64 812}

``````````

</details>


https://github.com/llvm/llvm-project/pull/150856


More information about the llvm-commits mailing list