[llvm] [MemProf] Add option to match summary and definition strictly (PR #83665)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 6 23:23:53 PST 2024


https://github.com/lifengxiang1025 updated https://github.com/llvm/llvm-project/pull/83665

>From 757e1ecb96844bf2c12daeff6a1a829f5c72aae6 Mon Sep 17 00:00:00 2001
From: lifengxiang <lifengxiang.1025 at bytedance.com>
Date: Sat, 2 Mar 2024 16:52:39 +0800
Subject: [PATCH 1/3] [MemProf] Add option to match summary and definition
 strictly

---
 .../IPO/MemProfContextDisambiguation.cpp      |  22 +-
 llvm/test/ThinLTO/X86/summary-matching.ll     | 387 ++++++++++++++++++
 2 files changed, 408 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/ThinLTO/X86/summary-matching.ll

diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 271d3ed40030b4..f13d351b36ac43 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -121,6 +121,11 @@ static cl::opt<unsigned>
                         cl::desc("Max depth to recursively search for missing "
                                  "frames through tail calls."));
 
+static cl::opt<bool>
+    StrictSummaryMatch("memprof-strict-summary-match", cl::init(false),
+                       cl::Hidden,
+                       cl::desc("Match summary and definition strictly."));
+
 namespace llvm {
 // Indicate we are linking with an allocator that supports hot/cold operator
 // new interfaces.
@@ -3375,10 +3380,25 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
 
     auto *GVSummary =
         ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier());
-    if (!GVSummary)
+    if (!GVSummary) {
       // Must have been imported, use the first summary (might be multiple if
       // this was a linkonce_odr).
       GVSummary = TheFnVI.getSummaryList().front().get();
+      if (StrictSummaryMatch) {
+        auto SrcModuleMD = F.getMetadata("thinlto_src_module");
+        assert(SrcModuleMD &&
+               "enable-import-metadata is needed to emit thinlto_src_module");
+        StringRef SrcModule =
+            dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
+        for (auto &GVS : TheFnVI.getSummaryList()) {
+          if (GVS->modulePath() == SrcModule) {
+            GVSummary = GVS.get();
+            break;
+          }
+        }
+        assert(GVSummary->modulePath() == SrcModule);
+      }
+    }
 
     // If this was an imported alias skip it as we won't have the function
     // summary, and it should be cloned in the original module.
diff --git a/llvm/test/ThinLTO/X86/summary-matching.ll b/llvm/test/ThinLTO/X86/summary-matching.ll
new file mode 100644
index 00000000000000..11ed4c7ed3c6c0
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/summary-matching.ll
@@ -0,0 +1,387 @@
+;; Test to make sure that function's definiton and summary matches.
+; RUN: split-file %s %t
+; RUN: opt -thinlto-bc %t/main.ll >%t/main.o
+; RUN: opt -thinlto-bc %t/b.ll >%t/b.o
+; RUN: opt -thinlto-bc %t/c.ll >%t/c.o
+
+; RUN: llvm-lto2 run %t/b.o %t/c.o %t/main.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new -o %t/a.out \
+; RUN: -r=%t/main.o,main,plx \
+; RUN: -r=%t/b.o,_Z1bv,plx \
+; RUN: -r=%t/b.o,_Z3fooIiET_S0_S0_,plx \
+; RUN: -r=%t/b.o,_Znwm \
+; RUN: -r=%t/c.o,_Z1cv,plx \
+; RUN: -r=%t/c.o,_Z3fooIiET_S0_S0_ \
+; RUN: -r=%t/c.o,_Z3barIiET_S0_S0_,plx \
+; RUN: -r=%t/c.o,_Znwm \
+; RUN: -r=%t/main.o,_Z1bv \
+; RUN: -r=%t/main.o,_Z1cv \
+; RUN: -r=%t/main.o,_Z3fooIiET_S0_S0_ -enable-import-metadata=true -memprof-strict-summary-match=true
+
+;; foo has two copys:
+;; foo in b.ll is prevailing and inlines bar.
+;; foo in c.ll isn't prevailing and doesn't inline bar.
+;; main will import foo in c.ll and foo's summary in b.ll default.
+
+;--- main.ll
+; ModuleID = 'main.cc'
+source_filename = "main.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress norecurse uwtable
+define dso_local noundef i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, ptr %retval, align 4
+  %call = call noundef i32 @_Z1bv(), !callsite !6
+  %call1 = call noundef i32 @_Z1cv(), !callsite !7
+  %add = add nsw i32 %call, %call1
+  %call2 = call noundef i32 @_Z3fooIiET_S0_S0_(i32 noundef 1, i32 noundef 2), !callsite !8
+  %add3 = add nsw i32 %add, %call2
+  ret i32 %add3
+}
+
+declare noundef i32 @_Z1bv() #1
+
+declare noundef i32 @_Z1cv() #1
+
+declare noundef i32 @_Z3fooIiET_S0_S0_(i32 noundef, i32 noundef) #1
+
+attributes #0 = { mustprogress norecurse uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2, !3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{i32 7, !"frame-pointer", i32 2}
+!5 = !{!"clang version 19.0.0git (https://github.com/lifengxiang1025/llvm-project.git ead0a9777f8ccb5c26d50d96bade6cd5b47f496b)"}
+!6 = !{i64 1}
+!7 = !{i64 5}
+!8 = !{i64 7}
+
+;--- c.ll
+; ModuleID = 'c.cc'
+source_filename = "c.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+$_Z3fooIiET_S0_S0_ = comdat any
+
+$_Z3barIiET_S0_S0_ = comdat any
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define dso_local noundef i32 @_Z1cv() #0 {
+entry:
+  %num1 = alloca i32, align 4
+  %num2 = alloca i32, align 4
+  store i32 1, ptr %num1, align 4
+  store i32 1, ptr %num2, align 4
+  %0 = load i32, ptr %num1, align 4
+  %1 = load i32, ptr %num2, align 4
+  %call = call noundef i32 @_Z3fooIiET_S0_S0_(i32 noundef %0, i32 noundef %1), !callsite !6
+  ret i32 %call
+}
+
+; Function Attrs: mustprogress uwtable
+define linkonce_odr dso_local noundef i32 @_Z3fooIiET_S0_S0_(i32 noundef %a, i32 noundef %b) #3 comdat {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %rtn = alloca i32, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 %b, ptr %b.addr, align 4
+  %0 = load i32, ptr %a.addr, align 4
+  %1 = load i32, ptr %b.addr, align 4
+  %call = call noundef i32 @_Z3barIiET_S0_S0_(i32 noundef %0, i32 noundef %1), !callsite !7
+  store i32 %call, ptr %rtn, align 4
+  %2 = load i32, ptr %rtn, align 4
+  ret i32 %2
+}
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define linkonce_odr dso_local noundef i32 @_Z3barIiET_S0_S0_(i32 noundef %a, i32 noundef %b) #0 comdat {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c = alloca ptr, align 8
+  %d = alloca ptr, align 8
+  store i32 %a, ptr %a.addr, align 4
+  store i32 %b, ptr %b.addr, align 4
+  %0 = load i32, ptr %a.addr, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, ptr %a.addr, align 4
+  %1 = load i32, ptr %b.addr, align 4
+  %add1 = add nsw i32 %1, 1
+  store i32 %add1, ptr %b.addr, align 4
+  %2 = load i32, ptr %a.addr, align 4
+  %add2 = add nsw i32 %2, 1
+  store i32 %add2, ptr %a.addr, align 4
+  %3 = load i32, ptr %b.addr, align 4
+  %add3 = add nsw i32 %3, 1
+  store i32 %add3, ptr %b.addr, align 4
+  %4 = load i32, ptr %a.addr, align 4
+  %add4 = add nsw i32 %4, 1
+  store i32 %add4, ptr %a.addr, align 4
+  %5 = load i32, ptr %b.addr, align 4
+  %add5 = add nsw i32 %5, 1
+  store i32 %add5, ptr %b.addr, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %add6 = add nsw i32 %6, 1
+  store i32 %add6, ptr %a.addr, align 4
+  %7 = load i32, ptr %b.addr, align 4
+  %add7 = add nsw i32 %7, 1
+  store i32 %add7, ptr %b.addr, align 4
+  %8 = load i32, ptr %a.addr, align 4
+  %add8 = add nsw i32 %8, 1
+  store i32 %add8, ptr %a.addr, align 4
+  %9 = load i32, ptr %b.addr, align 4
+  %add9 = add nsw i32 %9, 1
+  store i32 %add9, ptr %b.addr, align 4
+  %10 = load i32, ptr %a.addr, align 4
+  %add10 = add nsw i32 %10, 1
+  store i32 %add10, ptr %a.addr, align 4
+  %11 = load i32, ptr %b.addr, align 4
+  %add11 = add nsw i32 %11, 1
+  store i32 %add11, ptr %b.addr, align 4
+  %12 = load i32, ptr %a.addr, align 4
+  %add12 = add nsw i32 %12, 1
+  store i32 %add12, ptr %a.addr, align 4
+  %13 = load i32, ptr %b.addr, align 4
+  %add13 = add nsw i32 %13, 1
+  store i32 %add13, ptr %b.addr, align 4
+  %14 = load i32, ptr %a.addr, align 4
+  %add14 = add nsw i32 %14, 1
+  store i32 %add14, ptr %a.addr, align 4
+  %15 = load i32, ptr %b.addr, align 4
+  %add15 = add nsw i32 %15, 1
+  store i32 %add15, ptr %b.addr, align 4
+  %16 = load i32, ptr %a.addr, align 4
+  %add16 = add nsw i32 %16, 1
+  store i32 %add16, ptr %a.addr, align 4
+  %17 = load i32, ptr %b.addr, align 4
+  %add17 = add nsw i32 %17, 1
+  store i32 %add17, ptr %b.addr, align 4
+  %18 = load i32, ptr %a.addr, align 4
+  %add18 = add nsw i32 %18, 1
+  store i32 %add18, ptr %a.addr, align 4
+  %19 = load i32, ptr %b.addr, align 4
+  %add19 = add nsw i32 %19, 1
+  store i32 %add19, ptr %b.addr, align 4
+  %20 = load i32, ptr %a.addr, align 4
+  %add20 = add nsw i32 %20, 1
+  store i32 %add20, ptr %a.addr, align 4
+  %21 = load i32, ptr %b.addr, align 4
+  %add21 = add nsw i32 %21, 1
+  store i32 %add21, ptr %b.addr, align 4
+  %22 = load i32, ptr %a.addr, align 4
+  %add22 = add nsw i32 %22, 1
+  store i32 %add22, ptr %a.addr, align 4
+  %23 = load i32, ptr %b.addr, align 4
+  %add23 = add nsw i32 %23, 1
+  store i32 %add23, ptr %b.addr, align 4
+  %call = call noalias noundef nonnull ptr @_Znwm(i64 noundef 4) #2, !callsite !8
+  store i32 1, ptr %call, align 4
+  store ptr %call, ptr %c, align 8
+  %call24 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 4) #2, !callsite !9
+  store i32 1, ptr %call24, align 4
+  store ptr %call24, ptr %d, align 8
+  %24 = load i32, ptr %a.addr, align 4
+  %25 = load i32, ptr %b.addr, align 4
+  %cmp = icmp sgt i32 %24, %25
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %26 = load i32, ptr %a.addr, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %27 = load i32, ptr %b.addr, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %26, %cond.true ], [ %27, %cond.false ]
+  ret i32 %cond
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare noundef nonnull ptr @_Znwm(i64 noundef) #1
+
+attributes #0 = { mustprogress noinline optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nobuiltin allocsize(0) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { builtin allocsize(0) }
+attributes #3 = { mustprogress uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2, !3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{i32 7, !"frame-pointer", i32 2}
+!5 = !{!"clang version 19.0.0git (https://github.com/lifengxiang1025/llvm-project.git ead0a9777f8ccb5c26d50d96bade6cd5b47f496b)"}
+!6 = !{i64 6}
+!7 = !{i64 3}
+!8 = !{i64 4}
+!9 = !{i64 9}
+
+;--- b.ll
+; ModuleID = 'b.cc'
+source_filename = "b.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+$_Z3fooIiET_S0_S0_ = comdat any
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define dso_local noundef i32 @_Z1bv() #0 {
+entry:
+  %num1 = alloca i32, align 4
+  %num2 = alloca i32, align 4
+  store i32 0, ptr %num1, align 4
+  store i32 0, ptr %num2, align 4
+  %0 = load i32, ptr %num1, align 4
+  %1 = load i32, ptr %num2, align 4
+  %call = call noundef i32 @_Z3fooIiET_S0_S0_(i32 noundef %0, i32 noundef %1), !callsite !6
+  ret i32 %call
+}
+
+; Function Attrs: mustprogress uwtable
+define linkonce_odr dso_local noundef i32 @_Z3fooIiET_S0_S0_(i32 noundef %a, i32 noundef %b) #3 comdat {
+entry:
+  %a.addr.i = alloca i32, align 4
+  %b.addr.i = alloca i32, align 4
+  %c.i = alloca ptr, align 8
+  %d.i = alloca ptr, align 8
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %rtn = alloca i32, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 %b, ptr %b.addr, align 4
+  %0 = load i32, ptr %a.addr, align 4
+  %1 = load i32, ptr %b.addr, align 4
+  store i32 %0, ptr %a.addr.i, align 4
+  store i32 %1, ptr %b.addr.i, align 4
+  %2 = load i32, ptr %a.addr.i, align 4
+  %add.i = add nsw i32 %2, 1
+  store i32 %add.i, ptr %a.addr.i, align 4
+  %3 = load i32, ptr %b.addr.i, align 4
+  %add1.i = add nsw i32 %3, 1
+  store i32 %add1.i, ptr %b.addr.i, align 4
+  %4 = load i32, ptr %a.addr.i, align 4
+  %add2.i = add nsw i32 %4, 1
+  store i32 %add2.i, ptr %a.addr.i, align 4
+  %5 = load i32, ptr %b.addr.i, align 4
+  %add3.i = add nsw i32 %5, 1
+  store i32 %add3.i, ptr %b.addr.i, align 4
+  %6 = load i32, ptr %a.addr.i, align 4
+  %add4.i = add nsw i32 %6, 1
+  store i32 %add4.i, ptr %a.addr.i, align 4
+  %7 = load i32, ptr %b.addr.i, align 4
+  %add5.i = add nsw i32 %7, 1
+  store i32 %add5.i, ptr %b.addr.i, align 4
+  %8 = load i32, ptr %a.addr.i, align 4
+  %add6.i = add nsw i32 %8, 1
+  store i32 %add6.i, ptr %a.addr.i, align 4
+  %9 = load i32, ptr %b.addr.i, align 4
+  %add7.i = add nsw i32 %9, 1
+  store i32 %add7.i, ptr %b.addr.i, align 4
+  %10 = load i32, ptr %a.addr.i, align 4
+  %add8.i = add nsw i32 %10, 1
+  store i32 %add8.i, ptr %a.addr.i, align 4
+  %11 = load i32, ptr %b.addr.i, align 4
+  %add9.i = add nsw i32 %11, 1
+  store i32 %add9.i, ptr %b.addr.i, align 4
+  %12 = load i32, ptr %a.addr.i, align 4
+  %add10.i = add nsw i32 %12, 1
+  store i32 %add10.i, ptr %a.addr.i, align 4
+  %13 = load i32, ptr %b.addr.i, align 4
+  %add11.i = add nsw i32 %13, 1
+  store i32 %add11.i, ptr %b.addr.i, align 4
+  %14 = load i32, ptr %a.addr.i, align 4
+  %add12.i = add nsw i32 %14, 1
+  store i32 %add12.i, ptr %a.addr.i, align 4
+  %15 = load i32, ptr %b.addr.i, align 4
+  %add13.i = add nsw i32 %15, 1
+  store i32 %add13.i, ptr %b.addr.i, align 4
+  %16 = load i32, ptr %a.addr.i, align 4
+  %add14.i = add nsw i32 %16, 1
+  store i32 %add14.i, ptr %a.addr.i, align 4
+  %17 = load i32, ptr %b.addr.i, align 4
+  %add15.i = add nsw i32 %17, 1
+  store i32 %add15.i, ptr %b.addr.i, align 4
+  %18 = load i32, ptr %a.addr.i, align 4
+  %add16.i = add nsw i32 %18, 1
+  store i32 %add16.i, ptr %a.addr.i, align 4
+  %19 = load i32, ptr %b.addr.i, align 4
+  %add17.i = add nsw i32 %19, 1
+  store i32 %add17.i, ptr %b.addr.i, align 4
+  %20 = load i32, ptr %a.addr.i, align 4
+  %add18.i = add nsw i32 %20, 1
+  store i32 %add18.i, ptr %a.addr.i, align 4
+  %21 = load i32, ptr %b.addr.i, align 4
+  %add19.i = add nsw i32 %21, 1
+  store i32 %add19.i, ptr %b.addr.i, align 4
+  %22 = load i32, ptr %a.addr.i, align 4
+  %add20.i = add nsw i32 %22, 1
+  store i32 %add20.i, ptr %a.addr.i, align 4
+  %23 = load i32, ptr %b.addr.i, align 4
+  %add21.i = add nsw i32 %23, 1
+  store i32 %add21.i, ptr %b.addr.i, align 4
+  %24 = load i32, ptr %a.addr.i, align 4
+  %add22.i = add nsw i32 %24, 1
+  store i32 %add22.i, ptr %a.addr.i, align 4
+  %25 = load i32, ptr %b.addr.i, align 4
+  %add23.i = add nsw i32 %25, 1
+  store i32 %add23.i, ptr %b.addr.i, align 4
+  %call.i = call noalias noundef nonnull ptr @_Znwm(i64 noundef 4) #2, !callsite !7
+  store i32 1, ptr %call.i, align 4
+  store ptr %call.i, ptr %c.i, align 8
+  %call24.i = call noalias noundef nonnull ptr @_Znwm(i64 noundef 4) #2, !callsite !8
+  store i32 1, ptr %call24.i, align 4
+  store ptr %call24.i, ptr %d.i, align 8
+  %26 = load i32, ptr %a.addr.i, align 4
+  %27 = load i32, ptr %b.addr.i, align 4
+  %cmp.i = icmp sgt i32 %26, %27
+  br i1 %cmp.i, label %cond.true.i, label %cond.false.i
+
+cond.true.i:                                      ; preds = %entry
+  %28 = load i32, ptr %a.addr.i, align 4
+  br label %_Z3barIiET_S0_S0_.exit
+
+cond.false.i:                                     ; preds = %entry
+  %29 = load i32, ptr %b.addr.i, align 4
+  br label %_Z3barIiET_S0_S0_.exit
+
+_Z3barIiET_S0_S0_.exit:                           ; preds = %cond.true.i, %cond.false.i
+  %cond.i = phi i32 [ %28, %cond.true.i ], [ %29, %cond.false.i ]
+  store i32 %cond.i, ptr %rtn, align 4
+  %30 = load i32, ptr %rtn, align 4
+  ret i32 %30
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare noundef nonnull ptr @_Znwm(i64 noundef) #1
+
+attributes #0 = { mustprogress noinline optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nobuiltin allocsize(0) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { builtin allocsize(0) }
+attributes #3 = { mustprogress uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2, !3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{i32 7, !"frame-pointer", i32 2}
+!5 = !{!"clang version 19.0.0git (https://github.com/lifengxiang1025/llvm-project.git ead0a9777f8ccb5c26d50d96bade6cd5b47f496b)"}
+!6 = !{i64 2}
+!7 = !{i64 4, i64 3}
+!8 = !{i64 9, i64 3}

>From 85c630d5e6a9475238c76dab3198accd3ea18efe Mon Sep 17 00:00:00 2001
From: lifengxiang <lifengxiang.1025 at bytedance.com>
Date: Sat, 2 Mar 2024 17:32:33 +0800
Subject: [PATCH 2/3] clean testcase

---
 llvm/test/ThinLTO/X86/summary-matching.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/ThinLTO/X86/summary-matching.ll b/llvm/test/ThinLTO/X86/summary-matching.ll
index 11ed4c7ed3c6c0..e1ef8249b7ec61 100644
--- a/llvm/test/ThinLTO/X86/summary-matching.ll
+++ b/llvm/test/ThinLTO/X86/summary-matching.ll
@@ -59,7 +59,7 @@ attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protect
 !2 = !{i32 7, !"PIE Level", i32 2}
 !3 = !{i32 7, !"uwtable", i32 2}
 !4 = !{i32 7, !"frame-pointer", i32 2}
-!5 = !{!"clang version 19.0.0git (https://github.com/lifengxiang1025/llvm-project.git ead0a9777f8ccb5c26d50d96bade6cd5b47f496b)"}
+!5 = !{!"clang version 19.0.0"}
 !6 = !{i64 1}
 !7 = !{i64 5}
 !8 = !{i64 7}
@@ -224,7 +224,7 @@ attributes #3 = { mustprogress uwtable "frame-pointer"="all" "min-legal-vector-w
 !2 = !{i32 7, !"PIE Level", i32 2}
 !3 = !{i32 7, !"uwtable", i32 2}
 !4 = !{i32 7, !"frame-pointer", i32 2}
-!5 = !{!"clang version 19.0.0git (https://github.com/lifengxiang1025/llvm-project.git ead0a9777f8ccb5c26d50d96bade6cd5b47f496b)"}
+!5 = !{!"clang version 19.0.0"}
 !6 = !{i64 6}
 !7 = !{i64 3}
 !8 = !{i64 4}
@@ -381,7 +381,7 @@ attributes #3 = { mustprogress uwtable "frame-pointer"="all" "min-legal-vector-w
 !2 = !{i32 7, !"PIE Level", i32 2}
 !3 = !{i32 7, !"uwtable", i32 2}
 !4 = !{i32 7, !"frame-pointer", i32 2}
-!5 = !{!"clang version 19.0.0git (https://github.com/lifengxiang1025/llvm-project.git ead0a9777f8ccb5c26d50d96bade6cd5b47f496b)"}
+!5 = !{!"clang version 19.0.0"}
 !6 = !{i64 2}
 !7 = !{i64 4, i64 3}
 !8 = !{i64 9, i64 3}

>From ec2db334e68191b7cf6da64ea4d024030beb2b29 Mon Sep 17 00:00:00 2001
From: lifengxiang <lifengxiang.1025 at bytedance.com>
Date: Thu, 7 Mar 2024 15:23:00 +0800
Subject: [PATCH 3/3] remove new flag and do match default

---
 llvm/lib/Transforms/IPO/FunctionImport.cpp    |  8 ++++-
 .../IPO/MemProfContextDisambiguation.cpp      | 32 +++++++------------
 llvm/test/ThinLTO/X86/summary-matching.ll     |  2 +-
 llvm/tools/llvm-link/llvm-link.cpp            |  6 ++++
 4 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 5c7a74dadb46a8..34892e3504bb90 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -163,6 +163,10 @@ static cl::opt<std::string> WorkloadDefinitions(
              "}"),
     cl::Hidden);
 
+namespace llvm {
+extern cl::opt<bool> EnableMemProfContextDisambiguation;
+}
+
 // Load lazily a module from \p FileName in \p Context.
 static std::unique_ptr<Module> loadFile(const std::string &FileName,
                                         LLVMContext &Context) {
@@ -1643,7 +1647,9 @@ Expected<bool> FunctionImporter::importFunctions(
       if (Import) {
         if (Error Err = F.materialize())
           return std::move(Err);
-        if (EnableImportMetadata) {
+        // MemProf should match function's definition and summary,
+        // 'thinlto_src_module' is needed.
+        if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
           // statistics and debugging.
           F.setMetadata(
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index f13d351b36ac43..7ec5e0fc2d4d12 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -121,11 +121,6 @@ static cl::opt<unsigned>
                         cl::desc("Max depth to recursively search for missing "
                                  "frames through tail calls."));
 
-static cl::opt<bool>
-    StrictSummaryMatch("memprof-strict-summary-match", cl::init(false),
-                       cl::Hidden,
-                       cl::desc("Match summary and definition strictly."));
-
 namespace llvm {
 // Indicate we are linking with an allocator that supports hot/cold operator
 // new interfaces.
@@ -3381,23 +3376,20 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
     auto *GVSummary =
         ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier());
     if (!GVSummary) {
-      // Must have been imported, use the first summary (might be multiple if
-      // this was a linkonce_odr).
-      GVSummary = TheFnVI.getSummaryList().front().get();
-      if (StrictSummaryMatch) {
-        auto SrcModuleMD = F.getMetadata("thinlto_src_module");
-        assert(SrcModuleMD &&
-               "enable-import-metadata is needed to emit thinlto_src_module");
-        StringRef SrcModule =
-            dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
-        for (auto &GVS : TheFnVI.getSummaryList()) {
-          if (GVS->modulePath() == SrcModule) {
-            GVSummary = GVS.get();
-            break;
-          }
+      // Must have been imported, use the summary which matches the definition。
+      // (might be multiple if this was a linkonce_odr).
+      auto SrcModuleMD = F.getMetadata("thinlto_src_module");
+      assert(SrcModuleMD &&
+             "enable-import-metadata is needed to emit thinlto_src_module");
+      StringRef SrcModule =
+          dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
+      for (auto &GVS : TheFnVI.getSummaryList()) {
+        if (GVS->modulePath() == SrcModule) {
+          GVSummary = GVS.get();
+          break;
         }
-        assert(GVSummary->modulePath() == SrcModule);
       }
+      assert(GVSummary && GVSummary->modulePath() == SrcModule);
     }
 
     // If this was an imported alias skip it as we won't have the function
diff --git a/llvm/test/ThinLTO/X86/summary-matching.ll b/llvm/test/ThinLTO/X86/summary-matching.ll
index e1ef8249b7ec61..90e826545ed5e8 100644
--- a/llvm/test/ThinLTO/X86/summary-matching.ll
+++ b/llvm/test/ThinLTO/X86/summary-matching.ll
@@ -16,7 +16,7 @@
 ; RUN: -r=%t/c.o,_Znwm \
 ; RUN: -r=%t/main.o,_Z1bv \
 ; RUN: -r=%t/main.o,_Z1cv \
-; RUN: -r=%t/main.o,_Z3fooIiET_S0_S0_ -enable-import-metadata=true -memprof-strict-summary-match=true
+; RUN: -r=%t/main.o,_Z3fooIiET_S0_S0_ -enable-import-metadata=true 
 
 ;; foo has two copys:
 ;; foo in b.ll is prevailing and inlines bar.
diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp
index 9e7f2c3ebac437..2cd380353df3a1 100644
--- a/llvm/tools/llvm-link/llvm-link.cpp
+++ b/llvm/tools/llvm-link/llvm-link.cpp
@@ -134,6 +134,12 @@ static cl::opt<bool> TryUseNewDbgInfoFormat(
     cl::desc("Enable debuginfo iterator positions, if they're built in"),
     cl::init(false));
 
+namespace llvm {
+cl::opt<bool> EnableMemProfContextDisambiguation(
+    "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
+    cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
+}
+
 extern cl::opt<bool> UseNewDbgInfoFormat;
 
 static ExitOnError ExitOnErr;



More information about the llvm-commits mailing list