[compiler-rt] ac1a1e5 - [ThinLTO][TypeProf] Import local-linkage global var for mod1:func_foo-> mod2:local-var edge (#100448)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 24 18:23:18 PDT 2024


Author: Mingming Liu
Date: 2024-07-24T18:23:14-07:00
New Revision: ac1a1e5797388598201511d17f05aa088ef4a2e2

URL: https://github.com/llvm/llvm-project/commit/ac1a1e5797388598201511d17f05aa088ef4a2e2
DIFF: https://github.com/llvm/llvm-project/commit/ac1a1e5797388598201511d17f05aa088ef4a2e2.diff

LOG: [ThinLTO][TypeProf] Import local-linkage global var for mod1:func_foo-> mod2:local-var edge (#100448)

VTable value profiling can create reference edges from `mod1:func_foo`
to `mod2:local-vtable`. Indirect call profiling can create reference
edges from `mod1:func_foo` to `mod2:local_func_bar`.

Given a ref chain `mod1:func_foo -> mod2:local-var`,`local-var` doesn't
get imported by default.

Compiler checks / requires the module of 'local-var' is the same as the
function that referenced it(`mod1:func_foo`). This is to prevent
mis-compilation when both `mod1` and `mod2` has `local-var` of the same
name, and cpp files are compiled without full path.

This patch allows the import when one of the following conditions
happen:
1) Introduce an option `import-assume-local-unique`. When the compiler
user can guarantee that all files are compiled with full paths, they can
set this option.
2) When there is one instance of value summary.

Test:
* A/B testing this option alone gives -0.16% statistically consistent
cpu cycle reduction on one search workload (no throughput increase)
* Testing it together with existing more-efficient ICP bumps the
throughput increase by a margin (0.05%~0.1%)
* No regressions observed.

Added: 
    

Modified: 
    compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
    llvm/lib/Transforms/IPO/FunctionImport.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
index 411640fc5176d..214c1504db7f4 100644
--- a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
+++ b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
@@ -5,9 +5,9 @@
 // ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported
 // UNSUPPORTED: ppc && host-byteorder-big-endian
 
-// RUN: rm -rf %t && mkdir %t && cd %t
+// RUN: rm -rf %t && mkdir %t && split-file %s %t && cd %t
 
-// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o test
+// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling lib.cpp main.cpp -o test
 // RUN: env LLVM_PROFILE_FILE=test.profraw ./test
 
 // Show vtable profiles from raw profile.
@@ -37,23 +37,23 @@
 // COMMON-NEXT:  Number of instrumented vtables: 2
 // RAW:  Indirect Target Results:
 // RAW-NEXT:       [  0, _ZN8Derived14funcEii,        50 ] (25.00%)
-// RAW-NEXT:       [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii,        150 ] (75.00%)
+// RAW-NEXT:       [  0, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii,        150 ] (75.00%)
 // RAW-NEXT:       [  1, _ZN8Derived1D0Ev,        250 ] (25.00%)
-// RAW-NEXT:       [  1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev,        750 ] (75.00%)
+// RAW-NEXT:       [  1, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev,        750 ] (75.00%)
 // RAW-NEXT:  VTable Results:
 // RAW-NEXT:       [  0, _ZTV8Derived1,        50 ] (25.00%)
-// RAW-NEXT:       [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        150 ] (75.00%)
+// RAW-NEXT:       [  0, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        150 ] (75.00%)
 // RAW-NEXT:       [  1, _ZTV8Derived1,        250 ] (25.00%)
-// RAW-NEXT:       [  1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
+// RAW-NEXT:       [  1, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
 // INDEXED:     Indirect Target Results:
-// INDEXED-NEXT:         [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii,        150 ] (75.00%)
+// INDEXED-NEXT:         [  0, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii,        150 ] (75.00%)
 // INDEXED-NEXT:         [  0, _ZN8Derived14funcEii,        50 ] (25.00%)
-// INDEXED-NEXT:         [  1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev,        750 ] (75.00%)
+// INDEXED-NEXT:         [  1, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev,        750 ] (75.00%)
 // INDEXED-NEXT:         [  1, _ZN8Derived1D0Ev,        250 ] (25.00%)
 // INDEXED-NEXT:     VTable Results:
-// INDEXED-NEXT:         [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        150 ] (75.00%)
+// INDEXED-NEXT:         [  0, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        150 ] (75.00%)
 // INDEXED-NEXT:         [  0, _ZTV8Derived1,        50 ] (25.00%)
-// INDEXED-NEXT:         [  1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
+// INDEXED-NEXT:         [  1, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
 // INDEXED-NEXT:         [  1, _ZTV8Derived1,        250 ] (25.00%)
 // COMMON: Instrumentation level: IR  entry_first = 0
 // COMMON-NEXT: Functions shown: 1
@@ -93,27 +93,27 @@
 // ICTEXT: # NumValueSites:
 // ICTEXT: 2
 // ICTEXT: 2
-// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
+// ICTEXT: {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
 // ICTEXT: _ZN8Derived14funcEii:50
 // ICTEXT: 2
-// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
+// ICTEXT: {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
 // ICTEXT: _ZN8Derived1D0Ev:250
 // ICTEXT: # ValueKind = IPVK_VTableTarget:
 // ICTEXT: 2
 // ICTEXT: # NumValueSites:
 // ICTEXT: 2
 // ICTEXT: 2
-// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
+// ICTEXT: {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
 // ICTEXT: _ZTV8Derived1:50
 // ICTEXT: 2
-// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
+// ICTEXT: {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
 // ICTEXT: _ZTV8Derived1:250
 
 // When vtable value profiles exist, pgo-instr-use pass should annotate them
 // even if `-enable-vtable-value-profiling` is not explicitly on.
 // RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \
 // RUN:   -mllvm -print-after=pgo-instr-use -mllvm -filter-print-funcs=main \
-// RUN:   -mllvm -print-module-scope %s 2>&1 | FileCheck %s --check-prefix=ANNOTATE
+// RUN:   -mllvm -print-module-scope lib.cpp main.cpp 2>&1 | FileCheck %s --check-prefix=ANNOTATE
 
 // ANNOTATE-NOT: Inconsistent number of value sites
 // ANNOTATE: !{!"VP", i32 2
@@ -122,7 +122,7 @@
 // if `-icp-max-num-vtables` is set to zero.
 // RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \
 // RUN:   -mllvm -icp-max-num-vtables=0 -mllvm -print-after=pgo-instr-use \
-// RUN:   -mllvm -filter-print-funcs=main -mllvm -print-module-scope %s 2>&1 | \
+// RUN:   -mllvm -filter-print-funcs=main -mllvm -print-module-scope lib.cpp main.cpp 2>&1 | \
 // RUN:   FileCheck %s --check-prefix=OMIT
 
 // OMIT: Inconsistent number of value sites
@@ -141,28 +141,29 @@
 // RUN:    -g -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \
 // RUN:    -mllvm -enable-vtable-value-profiling -Wl,-mllvm,-enable-vtable-value-profiling \
 // RUN:    -mllvm -enable-vtable-profile-use \
+// RUN:    -Wl,-plugin-opt,-import-assume-unique-local \
 // RUN:    -Wl,-mllvm,-enable-vtable-profile-use -Rpass=pgo-icall-prom \
 // RUN:    -Wl,-mllvm,-print-after=pgo-icall-prom \
-// RUN:    -Wl,-mllvm,-filter-print-funcs=main %s 2>&1 \
+// RUN:    -Wl,-mllvm,-filter-print-funcs=main lib.cpp main.cpp 2>&1 \
 // RUN:    | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP"
 
 // For the indirect call site `ptr->func`
-// REMARK: instrprof-vtable-value-prof.cpp:226:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
-// REMARK: instrprof-vtable-value-prof.cpp:226:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
+// REMARK: main.cpp:10:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii.llvm.{{.*}} with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}}
+// REMARK: main.cpp:10:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
 //
 // For the indirect call site `delete ptr`
-// REMARK: instrprof-vtable-value-prof.cpp:228:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
-// REMARK: instrprof-vtable-value-prof.cpp:228:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
+// REMARK: main.cpp:12:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev.llvm.{{.*}} with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}}
+// REMARK: main.cpp:12:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
 
 // The IR matchers for indirect callsite `ptr->func`.
 // IR-LABEL: @main
 // IR:   [[OBJ:%.*]] = {{.*}}call {{.*}} @_Z10createTypei
 // IR:   [[VTABLE:%.*]] = load ptr, ptr [[OBJ]]
-// IR:   [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E, i32 16)
+// IR:   [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}, i32 16)
 // IR:   br i1 [[CMP1]], label %[[BB1:.*]], label %[[BB2:[a-zA-Z0-9_.]+]],
 //
 // IR: [[BB1]]:
-// IR:   [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii
+// IR:   [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii.llvm.{{.*}}
 // IR:   br label %[[MERGE0:[a-zA-Z0-9_.]+]]
 //
 // IR: [[BB2]]:
@@ -185,6 +186,7 @@
 // IR: [[MERGE0]]:
 // IR:    [[RES2:%.*]] = phi i32 [ [[RES1]], %[[MERGE1]] ], [ [[RESBB1]], %[[BB1]] ]
 
+//--- lib.h
 #include <stdio.h>
 #include <stdlib.h>
 class Base {
@@ -193,12 +195,19 @@ class Base {
 
   virtual ~Base() {};
 };
+
 class Derived1 : public Base {
 public:
-  int func(int a, int b) override { return a * b; }
+  int func(int a, int b) override;
 
   ~Derived1() {}
 };
+
+__attribute__((noinline)) Base *createType(int a);
+
+//--- lib.cpp
+#include "lib.h"
+
 namespace {
 class Derived2 : public Base {
 public:
@@ -207,7 +216,10 @@ class Derived2 : public Base {
   ~Derived2() {}
 };
 } // namespace
-__attribute__((noinline)) Base *createType(int a) {
+
+int Derived1::func(int a, int b) { return a * b; }
+
+Base *createType(int a) {
   Base *base = nullptr;
   if (a % 4 == 0)
     base = new Derived1();
@@ -216,6 +228,9 @@ __attribute__((noinline)) Base *createType(int a) {
   return base;
 }
 
+//--- main.cpp
+#include "lib.h"
+
 int main(int argc, char **argv) {
   int sum = 0;
   for (int i = 0; i < 1000; i++) {

diff  --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 038785114a0cf..547433c280869 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -174,6 +174,17 @@ static cl::opt<std::string> WorkloadDefinitions(
              "}"),
     cl::Hidden);
 
+static cl::opt<bool> ImportAssumeUniqueLocal(
+    "import-assume-unique-local", cl::init(false),
+    cl::desc(
+        "By default, a local-linkage global variable won't be imported in the "
+        "edge mod1:func -> mod2:local-var (from value profiles) since compiler "
+        "cannot assume mod2 is compiled with full path which gives local-var a "
+        "program-wide unique GUID. Set this option to true will help cross "
+        "module import of such variables. This is only safe if the compiler "
+        "user specify the full module path."),
+    cl::Hidden);
+
 namespace llvm {
 extern cl::opt<bool> EnableMemProfContextDisambiguation;
 }
@@ -196,6 +207,23 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
   return Result;
 }
 
+static bool shouldSkipLocalInAnotherModule(const GlobalVarSummary *RefSummary,
+                                           size_t NumDefs,
+                                           StringRef ImporterModule) {
+  // We can import a local from another module if all inputs are compiled
+  // with full paths or when there is one definition.
+  if (ImportAssumeUniqueLocal || NumDefs == 1)
+    return false;
+  // In other cases, make sure we import the copy in the caller's module if the
+  // referenced value has local linkage. The only time a local variable can
+  // share an entry in the index is if there is a local with the same name in
+  // another module that had the same source file name (in a 
diff erent
+  // directory), where each was compiled in their own directory so there was not
+  // distinguishing path.
+  return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
+         RefSummary->modulePath() != ImporterModule;
+}
+
 /// Given a list of possible callee implementation for a call site, qualify the
 /// legality of importing each. The return is a range of pairs. Each pair
 /// corresponds to a candidate. The first value is the ImportFailureReason for
@@ -228,19 +256,21 @@ static auto qualifyCalleeCandidates(
         if (!Summary)
           return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
 
-        // If this is a local function, make sure we import the copy
-        // in the caller's module. The only time a local function can
-        // share an entry in the index is if there is a local with the same name
-        // in another module that had the same source file name (in a 
diff erent
-        // directory), where each was compiled in their own directory so there
-        // was not distinguishing path.
-        // However, do the import from another module if there is only one
-        // entry in the list - in that case this must be a reference due
-        // to indirect call profile data, since a function pointer can point to
-        // a local in another module.
-        if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
-            CalleeSummaryList.size() > 1 &&
-            Summary->modulePath() != CallerModulePath)
+        // If this is a local function, make sure we import the copy in the
+        // caller's module. The only time a local function can share an entry in
+        // the index is if there is a local with the same name in another module
+        // that had the same source file name (in a 
diff erent directory), where
+        // each was compiled in their own directory so there was not
+        // distinguishing path.
+        // If the local function is from another module, it must be a reference
+        // due to indirect call profile data since a function pointer can point
+        // to a local in another module. Do the import from another module if
+        // there is only one entry in the list or when all files in the program
+        // are compiled with full path - in both cases the local function has
+        // unique PGO name and GUID.
+        if (shouldSkipLocalInAnotherModule(dyn_cast<GlobalVarSummary>(Summary),
+                                           CalleeSummaryList.size(),
+                                           CallerModulePath))
           return {
               FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
               GVSummary};
@@ -359,18 +389,6 @@ class GlobalsImporter final {
 
       LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
 
-      // If this is a local variable, make sure we import the copy
-      // in the caller's module. The only time a local variable can
-      // share an entry in the index is if there is a local with the same name
-      // in another module that had the same source file name (in a 
diff erent
-      // directory), where each was compiled in their own directory so there
-      // was not distinguishing path.
-      auto LocalNotInModule =
-          [&](const GlobalValueSummary *RefSummary) -> bool {
-        return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
-               RefSummary->modulePath() != Summary.modulePath();
-      };
-
       for (const auto &RefSummary : VI.getSummaryList()) {
         const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
         // Functions could be referenced by global vars - e.g. a vtable; but we
@@ -379,7 +397,8 @@ class GlobalsImporter final {
         // based on profile information). Should we decide to handle them here,
         // we can refactor accordingly at that time.
         if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
-            LocalNotInModule(GVS))
+            shouldSkipLocalInAnotherModule(GVS, VI.getSummaryList().size(),
+                                           Summary.modulePath()))
           continue;
 
         // If there isn't an entry for GUID, insert <GUID, Definition> pair.


        


More information about the llvm-commits mailing list