[llvm] [MemProf] Skip unmatched callers when cloning (PR #120455)

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 18 12:47:02 PST 2024


https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/120455

>From 9d252207ea3b354811d7633fb5d24a71ddc750c8 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 18 Dec 2024 08:43:25 -0800
Subject: [PATCH 1/2] [MemProf] Skip unmatched callers when cloning

Don't unnecessarily clone for a caller that wasn't matched to a call
instruction.

This necessitated updated a couple of tests that were either
unnecessarily cloning or unnecessarily processing an allocation and
hinting it not cold.
---
 .../IPO/MemProfContextDisambiguation.cpp      |  7 ++
 .../ThinLTO/X86/memprof-missing-callsite.ll   | 70 +++++++++++++++++++
 .../ThinLTO/X86/memprof-tailcall-nonunique.ll | 32 ++-------
 .../fix_clone_checking.ll                     | 10 +--
 4 files changed, 85 insertions(+), 34 deletions(-)
 create mode 100644 llvm/test/ThinLTO/X86/memprof-missing-callsite.ll

diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index a6be24601047f7..cd4b9f557fb494 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -3369,6 +3369,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
     if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
       break;
 
+    // If the caller was not successfully matched to a call in the IR/summary,
+    // there is no point in trying to clone for it as we can't update that call.
+    if (!CallerEdge->Caller->hasCall()) {
+      ++EI;
+      continue;
+    }
+
     // Only need to process the ids along this edge pertaining to the given
     // allocation.
     auto CallerEdgeContextsForAlloc =
diff --git a/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
new file mode 100644
index 00000000000000..6e8cc0f3e58d88
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
@@ -0,0 +1,70 @@
+;; Test callsite context graph generation for simple call graph with
+;; two memprof contexts and no inlining, where one callsite required for
+;; cloning is missing (e.g. unmatched).
+;;
+;; Original code looks like:
+;;
+;; char *foo() {
+;;   return new char[10];
+;; }
+;;
+;; int main(int argc, char **argv) {
+;;   char *x = foo();
+;;   char *y = foo();
+;;   memset(x, 0, 10);
+;;   memset(y, 0, 10);
+;;   delete[] x;
+;;   sleep(200);
+;;   delete[] y;
+;;   return 0;
+;; }
+
+; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
+; RUN:	-r=%t.o,main,plx \
+; RUN:	-r=%t.o,_Znam, \
+; RUN:	-memprof-report-hinted-sizes \
+; RUN:	-pass-remarks=memprof-context-disambiguation -save-temps \
+; RUN:	-o %t.out 2>&1 | FileCheck %s --implicit-check-not "call in clone _Z3foov" \
+; RUN:  --check-prefix=SIZESUNHINTED
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not "memprof"="cold"
+
+source_filename = "memprof-missing-callsite.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+  ;; Missing callsite metadata blocks cloning
+  %call = call ptr @_Z3foov()
+  %call1 = call ptr @_Z3foov()
+  ret i32 0
+}
+
+define internal ptr @_Z3foov() #0 {
+entry:
+  %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
+  ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+; uselistorder directives
+uselistorder ptr @_Z3foov, { 1, 0 }
+
+attributes #0 = { noinline optnone }
+
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold", !10}
+!4 = !{i64 9086428284934609951, i64 8632435727821051414}
+!5 = !{!6, !"cold", !11, !12}
+!6 = !{i64 9086428284934609951, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!10 = !{i64 123, i64 100}
+!11 = !{i64 456, i64 200}
+!12 = !{i64 789, i64 300}
+
+; SIZESUNHINTED: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning
+; SIZESUNHINTED: Cold full allocation context 456 with total size 200 is NotColdCold after cloning
+; SIZESUNHINTED: Cold full allocation context 789 with total size 300 is NotColdCold after cloning
diff --git a/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll b/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll
index 49c22bf590e6b6..a6863cb878be0c 100644
--- a/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll
+++ b/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll
@@ -20,7 +20,9 @@
 ; RUN:  -stats -debug -save-temps \
 ; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG
 
-; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
+;; We should not see any type of cold attribute or cloning applied
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not cold \
+; RUN:	--implicit-check-not ".memprof."
 
 ;; Try again but with distributed ThinLTO
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
@@ -39,26 +41,23 @@
 ; RUN:  -o %t2.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG
 
 ;; Run ThinLTO backend
+;; We should not see any type of cold attribute or cloning applied
 ; RUN: opt -passes=memprof-context-disambiguation \
 ; RUN:  -memprof-import-summary=%t.o.thinlto.bc \
-; RUN:  -stats %t.o -S 2>&1 | FileCheck %s --check-prefix=IR
+; RUN:  -stats %t.o -S 2>&1 | FileCheck %s --implicit-check-not cold \
+; RUN:	--implicit-check-not ".memprof."
 
 ; DEBUG: Not found through unique tail call chain: 17377440600225628772 (_Z3barv) from 15822663052811949562 (main) that actually called 8716735811002003409 (xyz) (found multiple possible chains)
 
 ; STATS: 1 memprof-context-disambiguation - Number of profiled callees found via multiple tail call chains
 
-;; Check that all calls in the IR are to the original functions, leading to a
-;; non-cold operator new call.
-
 source_filename = "tailcall-nonunique.cc"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 ; Function Attrs: noinline
-; IR-LABEL: @_Z3barv()
 define dso_local ptr @_Z3barv() local_unnamed_addr #0 {
 entry:
-  ; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
   %call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !9
   ret ptr %call
 }
@@ -67,54 +66,43 @@ entry:
 declare ptr @_Znam(i64) #1
 
 ; Function Attrs: noinline
-; IR-LABEL: @_Z5blah1v()
 define dso_local ptr @_Z5blah1v() local_unnamed_addr #0 {
 entry:
-  ; IR: call ptr @_Z3barv()
   %call = tail call ptr @_Z3barv()
   ret ptr %call
 }
 
 ; Function Attrs: noinline
-; IR-LABEL: @_Z5blah2v()
 define dso_local ptr @_Z5blah2v() local_unnamed_addr #0 {
 entry:
-  ; IR: call ptr @_Z3barv()
   %call = tail call ptr @_Z3barv()
   ret ptr %call
 }
 
 ; Function Attrs: noinline
-; IR-LABEL: @_Z4baz1v()
 define dso_local ptr @_Z4baz1v() local_unnamed_addr #0 {
 entry:
-  ; IR: call ptr @_Z5blah1v()
   %call = tail call ptr @_Z5blah1v()
   ret ptr %call
 }
 
 ; Function Attrs: noinline
-; IR-LABEL: @_Z4baz2v()
 define dso_local ptr @_Z4baz2v() local_unnamed_addr #0 {
 entry:
-  ; IR: call ptr @_Z5blah2v()
   %call = tail call ptr @_Z5blah2v()
   ret ptr %call
 }
 
 ; Function Attrs: noinline
-; IR-LABEL: @_Z3foob(i1 %b)
 define dso_local ptr @_Z3foob(i1 %b) local_unnamed_addr #0 {
 entry:
   br i1 %b, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-  ; IR: call ptr @_Z4baz1v()
   %call = tail call ptr @_Z4baz1v()
   br label %return
 
 if.else:                                          ; preds = %entry
-  ; IR: call ptr @_Z4baz2v()
   %call1 = tail call ptr @_Z4baz2v()
   br label %return
 
@@ -124,29 +112,21 @@ return:                                           ; preds = %if.else, %if.then
 }
 
 ; Function Attrs: noinline
-; IR-LABEL: @xyz()
 define dso_local i32 @xyz() local_unnamed_addr #0 {
 delete.end13:
-  ; IR: call ptr @_Z3foob(i1 true)
   %call = tail call ptr @_Z3foob(i1 true)
-  ; IR: call ptr @_Z3foob(i1 true)
   %call1 = tail call ptr @_Z3foob(i1 true)
-  ; IR: call ptr @_Z3foob(i1 false)
   %call2 = tail call ptr @_Z3foob(i1 false)
-  ; IR: call ptr @_Z3foob(i1 false)
   %call3 = tail call ptr @_Z3foob(i1 false)
   ret i32 0
 }
 
 define dso_local i32 @main() local_unnamed_addr #0 {
 delete.end13:
-  ; IR: call i32 @xyz()
   %call1 = tail call i32 @xyz(), !callsite !11
   ret i32 0
 }
 
-; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }
-
 attributes #0 = { noinline }
 attributes #1 = { nobuiltin allocsize(0) }
 attributes #2 = { builtin allocsize(0) }
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll
index 75cebae0b82971..bcb6ebc51d7bcd 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll
@@ -7,14 +7,8 @@
 ; RUN:		-memprof-verify-ccg -memprof-verify-nodes \
 ; RUN: 		-pass-remarks=memprof-context-disambiguation %s -S 2>&1 | FileCheck %s
 
-;; Make sure we created some clones
-; CHECK: created clone A.memprof.1
-; CHECK: created clone C.memprof.1
-; CHECK: created clone D.memprof.1
-; CHECK: created clone E.memprof.1
-; CHECK: created clone B.memprof.1
-; CHECK: created clone F.memprof.1
-; CHECK: created clone G.memprof.1
+;; Make sure we successfully created at least one clone
+; CHECK: created clone {{.*}}.memprof.1
 
 ; ModuleID = '<stdin>'
 source_filename = "reduced.ll"

>From 6c8748cd5e84fbede9dcb48d5d15e1e4e3aac1e6 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 18 Dec 2024 12:46:40 -0800
Subject: [PATCH 2/2] Add missing escapes

---
 llvm/test/ThinLTO/X86/memprof-missing-callsite.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
index 6e8cc0f3e58d88..3179df61a26eb7 100644
--- a/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
+++ b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
@@ -28,7 +28,7 @@
 ; RUN:	-pass-remarks=memprof-context-disambiguation -save-temps \
 ; RUN:	-o %t.out 2>&1 | FileCheck %s --implicit-check-not "call in clone _Z3foov" \
 ; RUN:  --check-prefix=SIZESUNHINTED
-; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not "memprof"="cold"
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not \"memprof\"=\"cold\"
 
 source_filename = "memprof-missing-callsite.ll"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"



More information about the llvm-commits mailing list