[llvm] 04c3040 - [InstrProfiling] Place __llvm_prf_vnodes and __llvm_prf_names in llvm.used on ELF

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 1 13:43:29 PST 2021


Author: Fangrui Song
Date: 2021-03-01T13:43:23-08:00
New Revision: 04c3040f417683e7c31b3ee3381a3263106f48c5

URL: https://github.com/llvm/llvm-project/commit/04c3040f417683e7c31b3ee3381a3263106f48c5
DIFF: https://github.com/llvm/llvm-project/commit/04c3040f417683e7c31b3ee3381a3263106f48c5.diff

LOG: [InstrProfiling] Place __llvm_prf_vnodes and __llvm_prf_names in llvm.used on ELF

`__llvm_prf_vnodes` and `__llvm_prf_names` are used by runtime but not
referenced via relocation in the translation unit.

With `-z start-stop-gc` (D96914 https://sourceware.org/bugzilla/show_bug.cgi?id=27451),
the linker no longer lets `__start_/__stop_` references retain them.

Place `__llvm_prf_vnodes` and `__llvm_prf_names` in `llvm.used` to make
them retained by the linker.

This patch changes most existing `UsedVars` cases to `CompilerUsedVars`
to reflect the ideal state - if the binary format properly supports
section based GC (dead stripping), `llvm.compiler.used` should be sufficient.

`__llvm_prf_vnodes` and `__llvm_prf_names` are switched to `UsedVars`
since we want them to be unconditionally retained by both compiler and linker.

Behaviors on other COFF/Mach-O are not affected.

Differential Revision: https://reviews.llvm.org/D97649

Added: 
    compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c
    compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c

Modified: 
    compiler-rt/test/profile/Linux/instrprof-value-merge.c
    llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
    llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
    llvm/test/Instrumentation/InstrProfiling/icall.ll

Removed: 
    


################################################################################
diff  --git a/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c b/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c
new file mode 100644
index 000000000000..a23bc484943e
--- /dev/null
+++ b/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c
@@ -0,0 +1,70 @@
+#include <string.h>
+
+void (*f0)();
+void (*f1)();
+void (*f2)();
+
+char dst[200];
+char src[200];
+volatile int n;
+
+__attribute__((noinline)) void foo() {}
+
+__attribute__((noinline)) void bar() {
+  f0 = foo;
+  f1 = foo;
+  f2 = foo;
+  n = 4;
+}
+int main(int argc, char *argv[]) {
+  int i;
+  bar();
+  if (argc == 1) {
+    f0();
+    for (i = 0; i < 9; i++)
+      f1();
+    for (i = 0; i < 99; i++)
+      f2();
+  } else {
+    memcpy((void *)dst, (void *)src, n);
+    for (i = 0; i < 6; i++)
+      memcpy((void *)(dst + 2), (void *)src, n + 1);
+    for (i = 0; i < 66; i++)
+      memcpy((void *)(dst + 9), (void *)src, n + 2);
+  }
+}
+
+// CHECK: Counters:
+// CHECK:   main:
+// CHECK:     Hash: 0x0a9bd81e87ab6e87
+// CHECK:     Counters: 6
+// CHECK:     Indirect Call Site Count: 3
+// CHECK:     Number of Memory Intrinsics Calls: 3
+// CHECK:     Block counts: [27, 297, 12, 132, 3, 2]
+// CHECK:     Indirect Target Results:
+// CHECK:         [ 0, foo, 3 ]
+// CHECK:         [ 1, foo, 27 ]
+// CHECK:         [ 2, foo, 297 ]
+// CHECK:     Memory Intrinsic Size Results:
+// CHECK:         [ 0, 4, 2 ]
+// CHECK:         [ 1, 5, 12 ]
+// CHECK:         [ 2, 6, 132 ]
+// CHECK: Instrumentation level: IR
+// CHECK: Functions shown: 1
+// CHECK: Total functions: 3
+// CHECK: Maximum function count: 327
+// CHECK: Maximum internal block count: 297
+// CHECK: Statistics for indirect call sites profile:
+// CHECK:   Total number of sites: 3
+// CHECK:   Total number of sites with values: 3
+// CHECK:   Total number of profiled values: 3
+// CHECK:   Value sites histogram:
+// CHECK:         NumTargets, SiteCount
+// CHECK:         1, 3
+// CHECK: Statistics for memory intrinsic calls sizes profile:
+// CHECK:   Total number of sites: 3
+// CHECK:   Total number of sites with values: 3
+// CHECK:   Total number of profiled values: 3
+// CHECK:   Value sites histogram:
+// CHECK:         NumTargets, SiteCount
+// CHECK:         1, 3

diff  --git a/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c b/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c
new file mode 100644
index 000000000000..e0079c02b850
--- /dev/null
+++ b/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c
@@ -0,0 +1,10 @@
+// REQUIRES: lld-available
+
+// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=lld -ffunction-sections -fdata-sections -Wl,--gc-sections -z start-stop-gc
+// RUN: rm -rf %t.profdir
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c

diff  --git a/compiler-rt/test/profile/Linux/instrprof-value-merge.c b/compiler-rt/test/profile/Linux/instrprof-value-merge.c
index 2619a1d00336..45eed474b4ab 100644
--- a/compiler-rt/test/profile/Linux/instrprof-value-merge.c
+++ b/compiler-rt/test/profile/Linux/instrprof-value-merge.c
@@ -1,79 +1,27 @@
-// RUN: %clang_pgogen -o %t -O3 %s
+// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c
 // RUN: rm -rf %t.profdir
 // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
 // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
 // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
 // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
 // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
-// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %s
+// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
 
-#include <string.h>
-
-void (*f0)();
-void (*f1)();
-void (*f2)();
-
-char dst[200];
-char src[200];
-volatile int n;
-
-__attribute__((noinline)) void foo() {}
-
-__attribute__((noinline)) void bar() {
-  f0 = foo;
-  f1 = foo;
-  f2 = foo;
-  n = 4;
-}
-int main(int argc, char *argv[]) {
-  int i;
-  bar();
-  if (argc == 1) {
-    f0();
-    for (i = 0; i < 9; i++)
-      f1();
-    for (i = 0; i < 99; i++)
-      f2();
-  } else {
-    memcpy((void *)dst, (void *)src, n);
-    for (i = 0; i < 6; i++)
-      memcpy((void *)(dst + 2), (void *)src, n + 1);
-    for (i = 0; i < 66; i++)
-      memcpy((void *)(dst + 9), (void *)src, n + 2);
-  }
-}
+/// -z start-stop-gc requires binutils 2.37.
+// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=bfd -ffunction-sections -fdata-sections -Wl,--gc-sections
+// RUN: rm -rf %t.profdir
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
 
-// CHECK: Counters:
-// CHECK:   main:
-// CHECK:     Hash: 0x0a9bd81e87ab6e87
-// CHECK:     Counters: 6
-// CHECK:     Indirect Call Site Count: 3
-// CHECK:     Number of Memory Intrinsics Calls: 3
-// CHECK:     Block counts: [27, 297, 12, 132, 3, 2]
-// CHECK:     Indirect Target Results:
-// CHECK:         [ 0, foo, 3 ]
-// CHECK:         [ 1, foo, 27 ]
-// CHECK:         [ 2, foo, 297 ]
-// CHECK:     Memory Intrinsic Size Results:
-// CHECK:         [ 0, 4, 2 ]
-// CHECK:         [ 1, 5, 12 ]
-// CHECK:         [ 2, 6, 132 ]
-// CHECK: Instrumentation level: IR
-// CHECK: Functions shown: 1
-// CHECK: Total functions: 3
-// CHECK: Maximum function count: 327
-// CHECK: Maximum internal block count: 297
-// CHECK: Statistics for indirect call sites profile:
-// CHECK:   Total number of sites: 3
-// CHECK:   Total number of sites with values: 3
-// CHECK:   Total number of profiled values: 3
-// CHECK:   Value sites histogram:
-// CHECK:         NumTargets, SiteCount
-// CHECK:         1, 3
-// CHECK: Statistics for memory intrinsic calls sizes profile:
-// CHECK:   Total number of sites: 3
-// CHECK:   Total number of sites with values: 3
-// CHECK:   Total number of profiled values: 3
-// CHECK:   Value sites histogram:
-// CHECK:         NumTargets, SiteCount
-// CHECK:         1, 3
+// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=gold -ffunction-sections -fdata-sections -Wl,--gc-sections
+// RUN: rm -rf %t.profdir
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c

diff  --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index 5242211138f5..94b156f3b137 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -57,6 +57,7 @@ class InstrProfiling : public PassInfoMixin<InstrProfiling> {
     }
   };
   DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
+  std::vector<GlobalValue *> CompilerUsedVars;
   std::vector<GlobalValue *> UsedVars;
   std::vector<GlobalVariable *> ReferencedNames;
   GlobalVariable *NamesVar;

diff  --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index d73bb66ed003..a17d6f52d77d 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -539,6 +539,7 @@ bool InstrProfiling::run(
   NamesVar = nullptr;
   NamesSize = 0;
   ProfileDataMap.clear();
+  CompilerUsedVars.clear();
   UsedVars.clear();
   TT = Triple(M.getTargetTriple());
 
@@ -921,7 +922,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
   ProfileDataMap[NamePtr] = PD;
 
   // Mark the data variable as used so that it isn't stripped out.
-  UsedVars.push_back(Data);
+  CompilerUsedVars.push_back(Data);
   // Now that the linkage set by the FE has been passed to the data and counter
   // variables, reset Name variable's linkage and visibility to private so that
   // it can be removed later by the compiler.
@@ -976,6 +977,8 @@ void InstrProfiling::emitVNodes() {
       Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
   VNodesVar->setSection(
       getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
+  // VNodesVar is used by runtime but not referenced via relocation by other
+  // sections. Conservatively make it linker retained.
   UsedVars.push_back(VNodesVar);
 }
 
@@ -1004,6 +1007,8 @@ void InstrProfiling::emitNameData() {
   // linker from inserting padding before the start of the names section or
   // between names entries.
   NamesVar->setAlignment(Align(1));
+  // NamesVar is used by runtime but not referenced via relocation by other
+  // sections. Conservatively make it linker retained.
   UsedVars.push_back(NamesVar);
 
   for (auto *NamePtr : ReferencedNames)
@@ -1031,6 +1036,9 @@ void InstrProfiling::emitRegistration() {
                        getInstrProfRegFuncName(), M);
 
   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
+  for (Value *Data : CompilerUsedVars)
+    if (Data != NamesVar && !isa<Function>(Data))
+      IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
   for (Value *Data : UsedVars)
     if (Data != NamesVar && !isa<Function>(Data))
       IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
@@ -1081,7 +1089,7 @@ bool InstrProfiling::emitRuntimeHook() {
   IRB.CreateRet(Load);
 
   // Mark the user variable as used so that it isn't stripped out.
-  UsedVars.push_back(User);
+  CompilerUsedVars.push_back(User);
   return true;
 }
 
@@ -1094,9 +1102,14 @@ void InstrProfiling::emitUses() {
   // or discarded as a unit, so llvm.compiler.used is sufficient. Otherwise,
   // conservatively make all of them retained by the linker.
   if (TT.isOSBinFormatELF())
-    appendToCompilerUsed(*M, UsedVars);
+    appendToCompilerUsed(*M, CompilerUsedVars);
   else
-    appendToUsed(*M, UsedVars);
+    appendToUsed(*M, CompilerUsedVars);
+
+  // We do not add proper references from used metadata sections to NamesVar and
+  // VNodesVar, so we have to be conservative and place them in llvm.used
+  // regardless of the target,
+  appendToUsed(*M, UsedVars);
 }
 
 void InstrProfiling::emitInitialization() {

diff  --git a/llvm/test/Instrumentation/InstrProfiling/icall.ll b/llvm/test/Instrumentation/InstrProfiling/icall.ll
index 311770ae5707..bc7d6c90d0be 100644
--- a/llvm/test/Instrumentation/InstrProfiling/icall.ll
+++ b/llvm/test/Instrumentation/InstrProfiling/icall.ll
@@ -50,6 +50,12 @@ attributes #0 = { nounwind }
 ; DYN-NOT: @__profvp_foo
 ; DYN-NOT: @__llvm_prf_vnodes
 
+;; __llvm_prf_vnodes and __llvm_prf_nm are not referenced by other metadata sections.
+;; We have to conservatively place them in llvm.used.
+; STATIC:      @llvm.used = appending global
+; STATIC-SAME:   @__llvm_prf_vnodes
+; STATIC-SAME:   @__llvm_prf_nm
+
 ; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0)
 ; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0)
 ; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0)


        


More information about the llvm-commits mailing list