[llvm] [MemProf] Skip unmatched callers when cloning (PR #120455)
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 18 12:47:02 PST 2024
https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/120455
>From 9d252207ea3b354811d7633fb5d24a71ddc750c8 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 18 Dec 2024 08:43:25 -0800
Subject: [PATCH 1/2] [MemProf] Skip unmatched callers when cloning
Don't unnecessarily clone for a caller that wasn't matched to a call
instruction.
This necessitated updated a couple of tests that were either
unnecessarily cloning or unnecessarily processing an allocation and
hinting it not cold.
---
.../IPO/MemProfContextDisambiguation.cpp | 7 ++
.../ThinLTO/X86/memprof-missing-callsite.ll | 70 +++++++++++++++++++
.../ThinLTO/X86/memprof-tailcall-nonunique.ll | 32 ++-------
.../fix_clone_checking.ll | 10 +--
4 files changed, 85 insertions(+), 34 deletions(-)
create mode 100644 llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index a6be24601047f7..cd4b9f557fb494 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -3369,6 +3369,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
break;
+ // If the caller was not successfully matched to a call in the IR/summary,
+ // there is no point in trying to clone for it as we can't update that call.
+ if (!CallerEdge->Caller->hasCall()) {
+ ++EI;
+ continue;
+ }
+
// Only need to process the ids along this edge pertaining to the given
// allocation.
auto CallerEdgeContextsForAlloc =
diff --git a/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
new file mode 100644
index 00000000000000..6e8cc0f3e58d88
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
@@ -0,0 +1,70 @@
+;; Test callsite context graph generation for simple call graph with
+;; two memprof contexts and no inlining, where one callsite required for
+;; cloning is missing (e.g. unmatched).
+;;
+;; Original code looks like:
+;;
+;; char *foo() {
+;; return new char[10];
+;; }
+;;
+;; int main(int argc, char **argv) {
+;; char *x = foo();
+;; char *y = foo();
+;; memset(x, 0, 10);
+;; memset(y, 0, 10);
+;; delete[] x;
+;; sleep(200);
+;; delete[] y;
+;; return 0;
+;; }
+
+; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t.o,main,plx \
+; RUN: -r=%t.o,_Znam, \
+; RUN: -memprof-report-hinted-sizes \
+; RUN: -pass-remarks=memprof-context-disambiguation -save-temps \
+; RUN: -o %t.out 2>&1 | FileCheck %s --implicit-check-not "call in clone _Z3foov" \
+; RUN: --check-prefix=SIZESUNHINTED
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not "memprof"="cold"
+
+source_filename = "memprof-missing-callsite.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+ ;; Missing callsite metadata blocks cloning
+ %call = call ptr @_Z3foov()
+ %call1 = call ptr @_Z3foov()
+ ret i32 0
+}
+
+define internal ptr @_Z3foov() #0 {
+entry:
+ %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
+ ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+; uselistorder directives
+uselistorder ptr @_Z3foov, { 1, 0 }
+
+attributes #0 = { noinline optnone }
+
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold", !10}
+!4 = !{i64 9086428284934609951, i64 8632435727821051414}
+!5 = !{!6, !"cold", !11, !12}
+!6 = !{i64 9086428284934609951, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!10 = !{i64 123, i64 100}
+!11 = !{i64 456, i64 200}
+!12 = !{i64 789, i64 300}
+
+; SIZESUNHINTED: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning
+; SIZESUNHINTED: Cold full allocation context 456 with total size 200 is NotColdCold after cloning
+; SIZESUNHINTED: Cold full allocation context 789 with total size 300 is NotColdCold after cloning
diff --git a/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll b/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll
index 49c22bf590e6b6..a6863cb878be0c 100644
--- a/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll
+++ b/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll
@@ -20,7 +20,9 @@
; RUN: -stats -debug -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG
-; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
+;; We should not see any type of cold attribute or cloning applied
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not cold \
+; RUN: --implicit-check-not ".memprof."
;; Try again but with distributed ThinLTO
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
@@ -39,26 +41,23 @@
; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG
;; Run ThinLTO backend
+;; We should not see any type of cold attribute or cloning applied
; RUN: opt -passes=memprof-context-disambiguation \
; RUN: -memprof-import-summary=%t.o.thinlto.bc \
-; RUN: -stats %t.o -S 2>&1 | FileCheck %s --check-prefix=IR
+; RUN: -stats %t.o -S 2>&1 | FileCheck %s --implicit-check-not cold \
+; RUN: --implicit-check-not ".memprof."
; DEBUG: Not found through unique tail call chain: 17377440600225628772 (_Z3barv) from 15822663052811949562 (main) that actually called 8716735811002003409 (xyz) (found multiple possible chains)
; STATS: 1 memprof-context-disambiguation - Number of profiled callees found via multiple tail call chains
-;; Check that all calls in the IR are to the original functions, leading to a
-;; non-cold operator new call.
-
source_filename = "tailcall-nonunique.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: noinline
-; IR-LABEL: @_Z3barv()
define dso_local ptr @_Z3barv() local_unnamed_addr #0 {
entry:
- ; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
%call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !9
ret ptr %call
}
@@ -67,54 +66,43 @@ entry:
declare ptr @_Znam(i64) #1
; Function Attrs: noinline
-; IR-LABEL: @_Z5blah1v()
define dso_local ptr @_Z5blah1v() local_unnamed_addr #0 {
entry:
- ; IR: call ptr @_Z3barv()
%call = tail call ptr @_Z3barv()
ret ptr %call
}
; Function Attrs: noinline
-; IR-LABEL: @_Z5blah2v()
define dso_local ptr @_Z5blah2v() local_unnamed_addr #0 {
entry:
- ; IR: call ptr @_Z3barv()
%call = tail call ptr @_Z3barv()
ret ptr %call
}
; Function Attrs: noinline
-; IR-LABEL: @_Z4baz1v()
define dso_local ptr @_Z4baz1v() local_unnamed_addr #0 {
entry:
- ; IR: call ptr @_Z5blah1v()
%call = tail call ptr @_Z5blah1v()
ret ptr %call
}
; Function Attrs: noinline
-; IR-LABEL: @_Z4baz2v()
define dso_local ptr @_Z4baz2v() local_unnamed_addr #0 {
entry:
- ; IR: call ptr @_Z5blah2v()
%call = tail call ptr @_Z5blah2v()
ret ptr %call
}
; Function Attrs: noinline
-; IR-LABEL: @_Z3foob(i1 %b)
define dso_local ptr @_Z3foob(i1 %b) local_unnamed_addr #0 {
entry:
br i1 %b, label %if.then, label %if.else
if.then: ; preds = %entry
- ; IR: call ptr @_Z4baz1v()
%call = tail call ptr @_Z4baz1v()
br label %return
if.else: ; preds = %entry
- ; IR: call ptr @_Z4baz2v()
%call1 = tail call ptr @_Z4baz2v()
br label %return
@@ -124,29 +112,21 @@ return: ; preds = %if.else, %if.then
}
; Function Attrs: noinline
-; IR-LABEL: @xyz()
define dso_local i32 @xyz() local_unnamed_addr #0 {
delete.end13:
- ; IR: call ptr @_Z3foob(i1 true)
%call = tail call ptr @_Z3foob(i1 true)
- ; IR: call ptr @_Z3foob(i1 true)
%call1 = tail call ptr @_Z3foob(i1 true)
- ; IR: call ptr @_Z3foob(i1 false)
%call2 = tail call ptr @_Z3foob(i1 false)
- ; IR: call ptr @_Z3foob(i1 false)
%call3 = tail call ptr @_Z3foob(i1 false)
ret i32 0
}
define dso_local i32 @main() local_unnamed_addr #0 {
delete.end13:
- ; IR: call i32 @xyz()
%call1 = tail call i32 @xyz(), !callsite !11
ret i32 0
}
-; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }
-
attributes #0 = { noinline }
attributes #1 = { nobuiltin allocsize(0) }
attributes #2 = { builtin allocsize(0) }
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll
index 75cebae0b82971..bcb6ebc51d7bcd 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll
@@ -7,14 +7,8 @@
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
; RUN: -pass-remarks=memprof-context-disambiguation %s -S 2>&1 | FileCheck %s
-;; Make sure we created some clones
-; CHECK: created clone A.memprof.1
-; CHECK: created clone C.memprof.1
-; CHECK: created clone D.memprof.1
-; CHECK: created clone E.memprof.1
-; CHECK: created clone B.memprof.1
-; CHECK: created clone F.memprof.1
-; CHECK: created clone G.memprof.1
+;; Make sure we successfully created at least one clone
+; CHECK: created clone {{.*}}.memprof.1
; ModuleID = '<stdin>'
source_filename = "reduced.ll"
>From 6c8748cd5e84fbede9dcb48d5d15e1e4e3aac1e6 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 18 Dec 2024 12:46:40 -0800
Subject: [PATCH 2/2] Add missing escapes
---
llvm/test/ThinLTO/X86/memprof-missing-callsite.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
index 6e8cc0f3e58d88..3179df61a26eb7 100644
--- a/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
+++ b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
@@ -28,7 +28,7 @@
; RUN: -pass-remarks=memprof-context-disambiguation -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --implicit-check-not "call in clone _Z3foov" \
; RUN: --check-prefix=SIZESUNHINTED
-; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not "memprof"="cold"
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not \"memprof\"=\"cold\"
source_filename = "memprof-missing-callsite.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
More information about the llvm-commits
mailing list