[llvm] [MemProf] Allow cloning of callsites in recursive cycles (PR #121985)
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 7 16:32:06 PST 2025
https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/121985
>From 5e409013dac2c5b068974c66bea75828b5dfcbe0 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Tue, 7 Jan 2025 10:55:20 -0800
Subject: [PATCH 1/3] [MemProf] Allow cloning of callsites in recursive cycles
Optionally (by default) no longer mark callsite nodes as Recursive,
which means they would be automatically skipped during cloning. This was
too conservative as it prevents cloning of any callsite that showed up
in any recursive cycle, even for non-recursive contexts.
While this will enable partial cloning of recursive contexts, the
recursive calls themselves will not be updated to call the correct
clone, possibly leading to some unnecessary but benign cloning and
affecting bytes hinted reporting. To prevent this, optional support
looks for recursive cycles in contexts during cloning and removes
those contexts from cloning. This requires some additional runtime
overhead, so is disabled by default for now.
Support for correct cloning of recursive cycles is WIP.
---
.../IPO/MemProfContextDisambiguation.cpp | 48 +++++-
llvm/test/ThinLTO/X86/memprof-recursive.ll | 141 ++++++++++++++++
.../MemProfContextDisambiguation/recursive.ll | 159 ++++++++++++++++++
3 files changed, 343 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/ThinLTO/X86/memprof-recursive.ll
create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 1bf7ff468d782b..a16d858a5a8765 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -122,6 +122,20 @@ static cl::opt<unsigned>
cl::desc("Max depth to recursively search for missing "
"frames through tail calls."));
+// By default enable cloning of callsites involved with recursive cycles
+static cl::opt<bool> SkipRecursiveCallsites(
+ "memprof-skip-recursive-callsites", cl::init(false), cl::Hidden,
+ cl::desc("Prevent cloning of callsites involved in recursive cycles"));
+
+// When enabled, try to detect and prevent cloning of recursive contexts.
+// This is only necessary until we support cloning through recursive cycles.
+// Leave off by default for now, as it requires a little bit of compile time
+// overhead and doesn't affect correctness, it will just inflate the cold hinted
+// bytes reporting a bit when -memprof-report-hinted-sizes is enabled.
+static cl::opt<bool> SkipRecursiveContexts(
+ "memprof-skip-recursive-contexts", cl::init(false), cl::Hidden,
+ cl::desc("Prevent cloning of contexts through recursive cycles"));
+
namespace llvm {
cl::opt<bool> EnableMemProfContextDisambiguation(
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
@@ -1236,9 +1250,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
StackEntryIdToContextNodeMap[StackId] = StackNode;
StackNode->OrigStackOrAllocId = StackId;
}
- auto Ins = StackIdSet.insert(StackId);
- if (!Ins.second)
- StackNode->Recursive = true;
+ // Marking a node recursive will prevent its cloning completely, even for
+ // non-recursive contexts flowing through it.
+ if (SkipRecursiveCallsites) {
+ auto Ins = StackIdSet.insert(StackId);
+ if (!Ins.second)
+ StackNode->Recursive = true;
+ }
StackNode->AllocTypes |= (uint8_t)AllocType;
PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
PrevNode = StackNode;
@@ -1375,8 +1393,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
set_union(CallerEdgeContextIds, Edge->ContextIds);
}
// Node can have more context ids than callers if some contexts terminate at
- // node and some are longer.
- assert(NodeContextIds == CallerEdgeContextIds ||
+ // node and some are longer. If we are not skipping recursive callsites but
+ // haven't enabled skipping of recursive contexts, this will be violated for
+ // incompletely cloned recursive cycles, so skip the checking in that case.
+ assert(!(SkipRecursiveCallsites || SkipRecursiveContexts) ||
+ NodeContextIds == CallerEdgeContextIds ||
set_is_subset(CallerEdgeContextIds, NodeContextIds));
}
if (Node->CalleeEdges.size()) {
@@ -3370,6 +3391,20 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
assert(Node->AllocTypes != (uint8_t)AllocationType::None);
+ DenseSet<uint32_t> RecursiveContextIds;
+ // If we are not skipping recursive callsites, and have also enabled
+ // skipping of recursive contexts, look for context ids that show up in
+ // multiple caller edges.
+ if (!SkipRecursiveCallsites && SkipRecursiveContexts) {
+ DenseSet<uint32_t> AllCallerContextIds;
+ for (auto &CE : Node->CallerEdges) {
+ AllCallerContextIds.reserve(CE->getContextIds().size());
+ for (auto Id : CE->getContextIds())
+ if (!AllCallerContextIds.insert(Id).second)
+ RecursiveContextIds.insert(Id);
+ }
+ }
+
// Iterate until we find no more opportunities for disambiguating the alloc
// types via cloning. In most cases this loop will terminate once the Node
// has a single allocation type, in which case no more cloning is needed.
@@ -3394,6 +3429,9 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
// allocation.
auto CallerEdgeContextsForAlloc =
set_intersection(CallerEdge->getContextIds(), AllocContextIds);
+ if (!RecursiveContextIds.empty())
+ CallerEdgeContextsForAlloc =
+ set_difference(CallerEdgeContextsForAlloc, RecursiveContextIds);
if (CallerEdgeContextsForAlloc.empty()) {
++EI;
continue;
diff --git a/llvm/test/ThinLTO/X86/memprof-recursive.ll b/llvm/test/ThinLTO/X86/memprof-recursive.ll
new file mode 100644
index 00000000000000..38cc1071303802
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-recursive.ll
@@ -0,0 +1,141 @@
+;; Test recursion handling during cloning.
+;;
+;; See llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll for
+;; information on how the test was created.
+
+; RUN: opt -thinlto-bc %s >%t.o
+
+;; By default we should enable cloning of contexts involved with recursive
+;; cycles, but not through the cycle itself. I.e. until full support for
+;; recursion is added, the cloned recursive call from C back to B (line 12) will
+;; not be updated to call a clone.
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t.o,_Z1Dv,plx \
+; RUN: -r=%t.o,_Z1Ci,plx \
+; RUN: -r=%t.o,_Z1Bi,plx \
+; RUN: -r=%t.o,main,plx \
+; RUN: -r=%t.o,_Znam, \
+; RUN: -memprof-verify-ccg -memprof-verify-nodes \
+; RUN: -pass-remarks=memprof-context-disambiguation \
+; RUN: -o %t.out 2>&1 | FileCheck %s \
+; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
+; RUN: --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=NOSKIP-RECUR-CONTEXTS
+
+;; Skipping recursive callsites should result in no cloning.
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t.o,_Z1Dv,plx \
+; RUN: -r=%t.o,_Z1Ci,plx \
+; RUN: -r=%t.o,_Z1Bi,plx \
+; RUN: -r=%t.o,main,plx \
+; RUN: -r=%t.o,_Znam, \
+; RUN: -memprof-verify-ccg -memprof-verify-nodes \
+; RUN: -pass-remarks=memprof-context-disambiguation \
+; RUN: -memprof-skip-recursive-callsites \
+; RUN: -o %t.out 2>&1 | FileCheck %s --allow-empty \
+; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
+; RUN: --implicit-check-not="created clone" \
+; RUN: --implicit-check-not="marked with memprof allocation attribute cold"
+
+;; Skipping recursive contexts should prevent spurious call to cloned version of
+;; B from the context starting at memprof_recursive.cc:19:13, which is actually
+;; recursive (until that support is added).
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t.o,_Z1Dv,plx \
+; RUN: -r=%t.o,_Z1Ci,plx \
+; RUN: -r=%t.o,_Z1Bi,plx \
+; RUN: -r=%t.o,main,plx \
+; RUN: -r=%t.o,_Znam, \
+; RUN: -memprof-verify-ccg -memprof-verify-nodes \
+; RUN: -pass-remarks=memprof-context-disambiguation \
+; RUN: -memprof-skip-recursive-contexts \
+; RUN: -o %t.out 2>&1 | FileCheck %s \
+; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
+; RUN: --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS
+
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1
+;; We should only call the cold clone for the recursive context if we haven't
+;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts.
+; NOSKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
+; SKIP-RECUR-CONTEXTS-NOT: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define ptr @_Z1Dv() !dbg !3 {
+entry:
+ %call = tail call ptr @_Znam(i64 10), !dbg !6, !memprof !7, !callsite !14
+ ret ptr null
+}
+
+define ptr @_Z1Ci(i32 %n) !dbg !15 {
+entry:
+ %call = tail call ptr @_Z1Dv(), !dbg !16, !callsite !17
+ br label %return
+
+if.end: ; No predecessors!
+ %call1 = tail call ptr @_Z1Bi(i32 0), !dbg !18, !callsite !19
+ br label %return
+
+return: ; preds = %if.end, %entry
+ ret ptr null
+}
+
+define ptr @_Z1Bi(i32 %n) !dbg !20 {
+entry:
+ %call = tail call ptr @_Z1Ci(i32 0), !dbg !21, !callsite !22
+ ret ptr null
+}
+
+define i32 @main() {
+entry:
+ %call = tail call ptr @_Z1Bi(i32 0), !dbg !23, !callsite !25
+ %call1 = tail call ptr @_Z1Bi(i32 0), !dbg !26, !callsite !27
+ %call2 = tail call ptr @_Z1Bi(i32 0), !dbg !28, !callsite !29
+ ret i32 0
+}
+
+declare ptr @_Znam(i64)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 7aec6dc477f8148ed066d10dfc7a012a51b6599c)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "memprof_recursive.cc", directory: ".", checksumkind: CSK_MD5, checksum: "2f15f63b187a0e0d40e7fdd18b10576a")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "D", linkageName: "_Z1Dv", scope: !1, file: !1, line: 4, type: !4, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!4 = !DISubroutineType(types: !5)
+!5 = !{}
+!6 = !DILocation(line: 5, column: 10, scope: !3)
+!7 = !{!8, !10, !12}
+!8 = !{!9, !"cold"}
+!9 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 6307901912192269588}
+!10 = !{!11, !"notcold"}
+!11 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 -7155190423157709404, i64 -2954124005641725917, i64 8632435727821051414}
+!12 = !{!13, !"cold"}
+!13 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 -7155190423157709404, i64 -2954124005641725917, i64 -3421689549917153178}
+!14 = !{i64 6541423618768552252}
+!15 = distinct !DISubprogram(name: "C", linkageName: "_Z1Ci", scope: !1, file: !1, line: 8, type: !4, scopeLine: 8, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!16 = !DILocation(line: 10, column: 12, scope: !15)
+!17 = !{i64 -200552803509692312}
+!18 = !DILocation(line: 12, column: 10, scope: !15)
+!19 = !{i64 -7155190423157709404}
+!20 = distinct !DISubprogram(name: "B", linkageName: "_Z1Bi", scope: !1, file: !1, line: 14, type: !4, scopeLine: 14, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!21 = !DILocation(line: 15, column: 10, scope: !20)
+!22 = !{i64 -2954124005641725917}
+!23 = !DILocation(line: 18, column: 13, scope: !24)
+!24 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 17, type: !4, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!25 = !{i64 8632435727821051414}
+!26 = !DILocation(line: 19, column: 13, scope: !24)
+!27 = !{i64 -3421689549917153178}
+!28 = !DILocation(line: 20, column: 13, scope: !24)
+!29 = !{i64 6307901912192269588}
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
new file mode 100644
index 00000000000000..42ea59be9269fd
--- /dev/null
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
@@ -0,0 +1,159 @@
+;; Test recursion handling during cloning.
+;;
+;; Original code looks like:
+;;
+;; #include <stdlib.h>
+;; #include <string.h>
+;; #include <unistd.h>
+;; __attribute((noinline)) char *D() {
+;; return new char[10];
+;; }
+;; __attribute((noinline)) char *B(int n);
+;; __attribute((noinline)) char *C(int n) {
+;; if (!n) {
+;; return D();
+;; }
+;; return B(n-1);
+;; }
+;; __attribute((noinline)) char *B(int n) {
+;; return C(n);
+;; }
+;; int main(int argc, char **argv) {
+;; char *x = B(1);
+;; char *y = B(1);
+;; char *z = B(0);
+;; memset(x, 0, 10);
+;; memset(y, 0, 10);
+;; memset(z, 0, 10);
+;; free(x);
+;; sleep(200);
+;; free(y);
+;; free(z);
+;; return 0;
+;; }
+;;
+;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
+
+;; By default we should enable cloning of contexts involved with recursive
+;; cycles, but not through the cycle itself. I.e. until full support for
+;; recursion is added, the cloned recursive call from C back to B (line 12) will
+;; not be updated to call a clone.
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN: -memprof-verify-ccg -memprof-verify-nodes \
+; RUN: -pass-remarks=memprof-context-disambiguation \
+; RUN: %s -S 2>&1 | FileCheck %s \
+; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
+; RUN: --check-prefix=ALL --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=NOSKIP-RECUR-CONTEXTS
+
+;; Skipping recursive callsites should result in no cloning.
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN: -memprof-verify-ccg -memprof-verify-nodes \
+; RUN: -pass-remarks=memprof-context-disambiguation \
+; RUN: -memprof-skip-recursive-callsites \
+; RUN: %s -S 2>&1 | FileCheck %s \
+; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
+; RUN: --implicit-check-not="created clone" \
+; RUN: --implicit-check-not="marked with memprof allocation attribute cold" \
+; RUN: --check-prefix=ALL
+
+;; Skipping recursive contexts should prevent spurious call to cloned version of
+;; B from the context starting at memprof_recursive.cc:19:13, which is actually
+;; recursive (until that support is added).
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN: -memprof-verify-ccg -memprof-verify-nodes \
+; RUN: -pass-remarks=memprof-context-disambiguation \
+; RUN: -memprof-skip-recursive-contexts \
+; RUN: %s -S 2>&1 | FileCheck %s \
+; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
+; RUN: --check-prefix=ALL --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS
+
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
+;; We should only call the cold clone for the recursive context if we haven't
+;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts.
+; NOSKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
+;; We should call the original B for the recursive context if we have
+;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts.
+; SKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:12:10: call in clone _Z1Ci assigned to call function clone _Z1Bi
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:18:13: call in clone main assigned to call function clone _Z1Bi
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi assigned to call function clone _Z1Ci
+; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci assigned to call function clone _Z1Dv
+; ALL: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define ptr @_Z1Dv() !dbg !3 {
+entry:
+ %call = tail call ptr @_Znam(i64 10), !dbg !6, !memprof !7, !callsite !14
+ ret ptr null
+}
+
+define ptr @_Z1Ci(i32 %n) !dbg !15 {
+entry:
+ %call = tail call ptr @_Z1Dv(), !dbg !16, !callsite !17
+ br label %return
+
+if.end: ; No predecessors!
+ %call1 = tail call ptr @_Z1Bi(i32 0), !dbg !18, !callsite !19
+ br label %return
+
+return: ; preds = %if.end, %entry
+ ret ptr null
+}
+
+define ptr @_Z1Bi(i32 %n) !dbg !20 {
+entry:
+ %call = tail call ptr @_Z1Ci(i32 0), !dbg !21, !callsite !22
+ ret ptr null
+}
+
+define i32 @main() {
+entry:
+ %call = tail call ptr @_Z1Bi(i32 0), !dbg !23, !callsite !25
+ %call1 = tail call ptr @_Z1Bi(i32 0), !dbg !26, !callsite !27
+ %call2 = tail call ptr @_Z1Bi(i32 0), !dbg !28, !callsite !29
+ ret i32 0
+}
+
+declare ptr @_Znam(i64)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 7aec6dc477f8148ed066d10dfc7a012a51b6599c)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "memprof_recursive.cc", directory: ".", checksumkind: CSK_MD5, checksum: "2f15f63b187a0e0d40e7fdd18b10576a")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "D", linkageName: "_Z1Dv", scope: !1, file: !1, line: 4, type: !4, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!4 = !DISubroutineType(types: !5)
+!5 = !{}
+!6 = !DILocation(line: 5, column: 10, scope: !3)
+!7 = !{!8, !10, !12}
+!8 = !{!9, !"cold"}
+!9 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 6307901912192269588}
+!10 = !{!11, !"notcold"}
+!11 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 -7155190423157709404, i64 -2954124005641725917, i64 8632435727821051414}
+!12 = !{!13, !"cold"}
+!13 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 -7155190423157709404, i64 -2954124005641725917, i64 -3421689549917153178}
+!14 = !{i64 6541423618768552252}
+!15 = distinct !DISubprogram(name: "C", linkageName: "_Z1Ci", scope: !1, file: !1, line: 8, type: !4, scopeLine: 8, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!16 = !DILocation(line: 10, column: 12, scope: !15)
+!17 = !{i64 -200552803509692312}
+!18 = !DILocation(line: 12, column: 10, scope: !15)
+!19 = !{i64 -7155190423157709404}
+!20 = distinct !DISubprogram(name: "B", linkageName: "_Z1Bi", scope: !1, file: !1, line: 14, type: !4, scopeLine: 14, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!21 = !DILocation(line: 15, column: 10, scope: !20)
+!22 = !{i64 -2954124005641725917}
+!23 = !DILocation(line: 18, column: 13, scope: !24)
+!24 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 17, type: !4, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!25 = !{i64 8632435727821051414}
+!26 = !DILocation(line: 19, column: 13, scope: !24)
+!27 = !{i64 -3421689549917153178}
+!28 = !DILocation(line: 20, column: 13, scope: !24)
+!29 = !{i64 6307901912192269588}
>From e4afefc14c4344229efb28c95fd823d556747a87 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Tue, 7 Jan 2025 14:53:43 -0800
Subject: [PATCH 2/3] Address comments
---
.../IPO/MemProfContextDisambiguation.cpp | 18 ++++++++++--------
llvm/test/ThinLTO/X86/memprof-recursive.ll | 2 +-
.../MemProfContextDisambiguation/recursive.ll | 2 +-
3 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index a16d858a5a8765..9ccfcecd88d0d2 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -123,9 +123,9 @@ static cl::opt<unsigned>
"frames through tail calls."));
// By default enable cloning of callsites involved with recursive cycles
-static cl::opt<bool> SkipRecursiveCallsites(
- "memprof-skip-recursive-callsites", cl::init(false), cl::Hidden,
- cl::desc("Prevent cloning of callsites involved in recursive cycles"));
+static cl::opt<bool> AllowRecursiveCallsites(
+ "memprof-allow-recursive-callsites", cl::init(true), cl::Hidden,
+ cl::desc("Allow cloning of callsites involved in recursive cycles"));
// When enabled, try to detect and prevent cloning of recursive contexts.
// This is only necessary until we support cloning through recursive cycles.
@@ -1252,7 +1252,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
}
// Marking a node recursive will prevent its cloning completely, even for
// non-recursive contexts flowing through it.
- if (SkipRecursiveCallsites) {
+ if (!AllowRecursiveCallsites) {
auto Ins = StackIdSet.insert(StackId);
if (!Ins.second)
StackNode->Recursive = true;
@@ -1393,10 +1393,10 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
set_union(CallerEdgeContextIds, Edge->ContextIds);
}
// Node can have more context ids than callers if some contexts terminate at
- // node and some are longer. If we are not skipping recursive callsites but
+ // node and some are longer. If we are allowing recursive callsites but
// haven't enabled skipping of recursive contexts, this will be violated for
// incompletely cloned recursive cycles, so skip the checking in that case.
- assert(!(SkipRecursiveCallsites || SkipRecursiveContexts) ||
+ assert((AllowRecursiveCallsites && !SkipRecursiveContexts) ||
NodeContextIds == CallerEdgeContextIds ||
set_is_subset(CallerEdgeContextIds, NodeContextIds));
}
@@ -3392,12 +3392,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
assert(Node->AllocTypes != (uint8_t)AllocationType::None);
DenseSet<uint32_t> RecursiveContextIds;
- // If we are not skipping recursive callsites, and have also enabled
+ // If we are allowing recursive callsites, and have also enabled
// skipping of recursive contexts, look for context ids that show up in
// multiple caller edges.
- if (!SkipRecursiveCallsites && SkipRecursiveContexts) {
+ if (AllowRecursiveCallsites && SkipRecursiveContexts) {
DenseSet<uint32_t> AllCallerContextIds;
for (auto &CE : Node->CallerEdges) {
+ // Resize to the largest set of caller context ids, since we know the
+ // final set will be at least that large.
AllCallerContextIds.reserve(CE->getContextIds().size());
for (auto Id : CE->getContextIds())
if (!AllCallerContextIds.insert(Id).second)
diff --git a/llvm/test/ThinLTO/X86/memprof-recursive.ll b/llvm/test/ThinLTO/X86/memprof-recursive.ll
index 38cc1071303802..16fc82eaf391a2 100644
--- a/llvm/test/ThinLTO/X86/memprof-recursive.ll
+++ b/llvm/test/ThinLTO/X86/memprof-recursive.ll
@@ -32,7 +32,7 @@
; RUN: -r=%t.o,_Znam, \
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
; RUN: -pass-remarks=memprof-context-disambiguation \
-; RUN: -memprof-skip-recursive-callsites \
+; RUN: -memprof-allow-recursive-callsites=false \
; RUN: -o %t.out 2>&1 | FileCheck %s --allow-empty \
; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
; RUN: --implicit-check-not="created clone" \
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
index 42ea59be9269fd..63221a723edf27 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
@@ -49,7 +49,7 @@
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
; RUN: -pass-remarks=memprof-context-disambiguation \
-; RUN: -memprof-skip-recursive-callsites \
+; RUN: -memprof-allow-recursive-callsites=false \
; RUN: %s -S 2>&1 | FileCheck %s \
; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
; RUN: --implicit-check-not="created clone" \
>From 14e06cc62c63d880c708a2671c8f4d5a743eabce Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Tue, 7 Jan 2025 16:31:10 -0800
Subject: [PATCH 3/3] Change other flag to positive intent too
---
.../IPO/MemProfContextDisambiguation.cpp | 25 +++++++------
llvm/test/ThinLTO/X86/memprof-recursive.ll | 28 +++++++--------
.../MemProfContextDisambiguation/recursive.ll | 36 +++++++++----------
3 files changed, 44 insertions(+), 45 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 9ccfcecd88d0d2..016db55c99c3e5 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -127,14 +127,14 @@ static cl::opt<bool> AllowRecursiveCallsites(
"memprof-allow-recursive-callsites", cl::init(true), cl::Hidden,
cl::desc("Allow cloning of callsites involved in recursive cycles"));
-// When enabled, try to detect and prevent cloning of recursive contexts.
+// When disabled, try to detect and prevent cloning of recursive contexts.
// This is only necessary until we support cloning through recursive cycles.
-// Leave off by default for now, as it requires a little bit of compile time
-// overhead and doesn't affect correctness, it will just inflate the cold hinted
-// bytes reporting a bit when -memprof-report-hinted-sizes is enabled.
-static cl::opt<bool> SkipRecursiveContexts(
- "memprof-skip-recursive-contexts", cl::init(false), cl::Hidden,
- cl::desc("Prevent cloning of contexts through recursive cycles"));
+// Leave on by default for now, as disabling requires a little bit of compile
+// time overhead and doesn't affect correctness, it will just inflate the cold
+// hinted bytes reporting a bit when -memprof-report-hinted-sizes is enabled.
+static cl::opt<bool> AllowRecursiveContexts(
+ "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden,
+ cl::desc("Allow cloning of contexts through recursive cycles"));
namespace llvm {
cl::opt<bool> EnableMemProfContextDisambiguation(
@@ -1394,9 +1394,9 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
}
// Node can have more context ids than callers if some contexts terminate at
// node and some are longer. If we are allowing recursive callsites but
- // haven't enabled skipping of recursive contexts, this will be violated for
+ // haven't disabled recursive contexts, this will be violated for
// incompletely cloned recursive cycles, so skip the checking in that case.
- assert((AllowRecursiveCallsites && !SkipRecursiveContexts) ||
+ assert((AllowRecursiveCallsites && AllowRecursiveContexts) ||
NodeContextIds == CallerEdgeContextIds ||
set_is_subset(CallerEdgeContextIds, NodeContextIds));
}
@@ -3392,10 +3392,9 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
assert(Node->AllocTypes != (uint8_t)AllocationType::None);
DenseSet<uint32_t> RecursiveContextIds;
- // If we are allowing recursive callsites, and have also enabled
- // skipping of recursive contexts, look for context ids that show up in
- // multiple caller edges.
- if (AllowRecursiveCallsites && SkipRecursiveContexts) {
+ // If we are allowing recursive callsites, but have also disabled recursive
+ // contexts, look for context ids that show up in multiple caller edges.
+ if (AllowRecursiveCallsites && !AllowRecursiveContexts) {
DenseSet<uint32_t> AllCallerContextIds;
for (auto &CE : Node->CallerEdges) {
// Resize to the largest set of caller context ids, since we know the
diff --git a/llvm/test/ThinLTO/X86/memprof-recursive.ll b/llvm/test/ThinLTO/X86/memprof-recursive.ll
index 16fc82eaf391a2..2b1d7081b7610e 100644
--- a/llvm/test/ThinLTO/X86/memprof-recursive.ll
+++ b/llvm/test/ThinLTO/X86/memprof-recursive.ll
@@ -20,7 +20,7 @@
; RUN: -pass-remarks=memprof-context-disambiguation \
; RUN: -o %t.out 2>&1 | FileCheck %s \
; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
-; RUN: --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=NOSKIP-RECUR-CONTEXTS
+; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS
;; Skipping recursive callsites should result in no cloning.
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
@@ -50,23 +50,23 @@
; RUN: -r=%t.o,_Znam, \
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
; RUN: -pass-remarks=memprof-context-disambiguation \
-; RUN: -memprof-skip-recursive-contexts \
+; RUN: -memprof-allow-recursive-contexts=false \
; RUN: -o %t.out 2>&1 | FileCheck %s \
; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
-; RUN: --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS
+; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1
-;; We should only call the cold clone for the recursive context if we haven't
-;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts.
-; NOSKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1
+;; We should only call the cold clone for the recursive context if we enabled
+;; recursive contexts via -memprof-allow-recursive-contexts=true (default).
+; ALLOW-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
; SKIP-RECUR-CONTEXTS-NOT: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
index 63221a723edf27..759d5115896c1f 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
@@ -43,7 +43,7 @@
; RUN: -pass-remarks=memprof-context-disambiguation \
; RUN: %s -S 2>&1 | FileCheck %s \
; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
-; RUN: --check-prefix=ALL --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=NOSKIP-RECUR-CONTEXTS
+; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS
;; Skipping recursive callsites should result in no cloning.
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
@@ -62,28 +62,28 @@
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
; RUN: -pass-remarks=memprof-context-disambiguation \
-; RUN: -memprof-skip-recursive-contexts \
+; RUN: -memprof-allow-recursive-contexts=false \
; RUN: %s -S 2>&1 | FileCheck %s \
; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
-; RUN: --check-prefix=ALL --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS
+; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
-;; We should only call the cold clone for the recursive context if we haven't
-;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts.
-; NOSKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
+;; We should only call the cold clone for the recursive context if we enabled
+;; recursive contexts via -memprof-allow-recursive-contexts=true (default).
+; ALLOW-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
;; We should call the original B for the recursive context if we have
-;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts.
+;; disabled recursive contexts via -memprof-allow-recursive-contexts=false.
; SKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:12:10: call in clone _Z1Ci assigned to call function clone _Z1Bi
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:18:13: call in clone main assigned to call function clone _Z1Bi
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi assigned to call function clone _Z1Ci
-; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci assigned to call function clone _Z1Dv
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:12:10: call in clone _Z1Ci assigned to call function clone _Z1Bi
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:18:13: call in clone main assigned to call function clone _Z1Bi
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi assigned to call function clone _Z1Ci
+; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci assigned to call function clone _Z1Dv
; ALL: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
More information about the llvm-commits
mailing list