[llvm] 04c3040 - [InstrProfiling] Place __llvm_prf_vnodes and __llvm_prf_names in llvm.used on ELF
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 1 13:43:29 PST 2021
Author: Fangrui Song
Date: 2021-03-01T13:43:23-08:00
New Revision: 04c3040f417683e7c31b3ee3381a3263106f48c5
URL: https://github.com/llvm/llvm-project/commit/04c3040f417683e7c31b3ee3381a3263106f48c5
DIFF: https://github.com/llvm/llvm-project/commit/04c3040f417683e7c31b3ee3381a3263106f48c5.diff
LOG: [InstrProfiling] Place __llvm_prf_vnodes and __llvm_prf_names in llvm.used on ELF
`__llvm_prf_vnodes` and `__llvm_prf_names` are used by runtime but not
referenced via relocation in the translation unit.
With `-z start-stop-gc` (D96914 https://sourceware.org/bugzilla/show_bug.cgi?id=27451),
the linker no longer lets `__start_/__stop_` references retain them.
Place `__llvm_prf_vnodes` and `__llvm_prf_names` in `llvm.used` to make
them retained by the linker.
This patch changes most existing `UsedVars` cases to `CompilerUsedVars`
to reflect the ideal state - if the binary format properly supports
section based GC (dead stripping), `llvm.compiler.used` should be sufficient.
`__llvm_prf_vnodes` and `__llvm_prf_names` are switched to `UsedVars`
since we want them to be unconditionally retained by both compiler and linker.
Behaviors on other COFF/Mach-O are not affected.
Differential Revision: https://reviews.llvm.org/D97649
Added:
compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c
compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c
Modified:
compiler-rt/test/profile/Linux/instrprof-value-merge.c
llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
llvm/test/Instrumentation/InstrProfiling/icall.ll
Removed:
################################################################################
diff --git a/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c b/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c
new file mode 100644
index 000000000000..a23bc484943e
--- /dev/null
+++ b/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c
@@ -0,0 +1,70 @@
+#include <string.h>
+
+void (*f0)();
+void (*f1)();
+void (*f2)();
+
+char dst[200];
+char src[200];
+volatile int n;
+
+__attribute__((noinline)) void foo() {}
+
+__attribute__((noinline)) void bar() {
+ f0 = foo;
+ f1 = foo;
+ f2 = foo;
+ n = 4;
+}
+int main(int argc, char *argv[]) {
+ int i;
+ bar();
+ if (argc == 1) {
+ f0();
+ for (i = 0; i < 9; i++)
+ f1();
+ for (i = 0; i < 99; i++)
+ f2();
+ } else {
+ memcpy((void *)dst, (void *)src, n);
+ for (i = 0; i < 6; i++)
+ memcpy((void *)(dst + 2), (void *)src, n + 1);
+ for (i = 0; i < 66; i++)
+ memcpy((void *)(dst + 9), (void *)src, n + 2);
+ }
+}
+
+// CHECK: Counters:
+// CHECK: main:
+// CHECK: Hash: 0x0a9bd81e87ab6e87
+// CHECK: Counters: 6
+// CHECK: Indirect Call Site Count: 3
+// CHECK: Number of Memory Intrinsics Calls: 3
+// CHECK: Block counts: [27, 297, 12, 132, 3, 2]
+// CHECK: Indirect Target Results:
+// CHECK: [ 0, foo, 3 ]
+// CHECK: [ 1, foo, 27 ]
+// CHECK: [ 2, foo, 297 ]
+// CHECK: Memory Intrinsic Size Results:
+// CHECK: [ 0, 4, 2 ]
+// CHECK: [ 1, 5, 12 ]
+// CHECK: [ 2, 6, 132 ]
+// CHECK: Instrumentation level: IR
+// CHECK: Functions shown: 1
+// CHECK: Total functions: 3
+// CHECK: Maximum function count: 327
+// CHECK: Maximum internal block count: 297
+// CHECK: Statistics for indirect call sites profile:
+// CHECK: Total number of sites: 3
+// CHECK: Total number of sites with values: 3
+// CHECK: Total number of profiled values: 3
+// CHECK: Value sites histogram:
+// CHECK: NumTargets, SiteCount
+// CHECK: 1, 3
+// CHECK: Statistics for memory intrinsic calls sizes profile:
+// CHECK: Total number of sites: 3
+// CHECK: Total number of sites with values: 3
+// CHECK: Total number of profiled values: 3
+// CHECK: Value sites histogram:
+// CHECK: NumTargets, SiteCount
+// CHECK: 1, 3
diff --git a/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c b/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c
new file mode 100644
index 000000000000..e0079c02b850
--- /dev/null
+++ b/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c
@@ -0,0 +1,10 @@
+// REQUIRES: lld-available
+
+// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=lld -ffunction-sections -fdata-sections -Wl,--gc-sections -z start-stop-gc
+// RUN: rm -rf %t.profdir
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
diff --git a/compiler-rt/test/profile/Linux/instrprof-value-merge.c b/compiler-rt/test/profile/Linux/instrprof-value-merge.c
index 2619a1d00336..45eed474b4ab 100644
--- a/compiler-rt/test/profile/Linux/instrprof-value-merge.c
+++ b/compiler-rt/test/profile/Linux/instrprof-value-merge.c
@@ -1,79 +1,27 @@
-// RUN: %clang_pgogen -o %t -O3 %s
+// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c
// RUN: rm -rf %t.profdir
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
-// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %s
+// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
-#include <string.h>
-
-void (*f0)();
-void (*f1)();
-void (*f2)();
-
-char dst[200];
-char src[200];
-volatile int n;
-
-__attribute__((noinline)) void foo() {}
-
-__attribute__((noinline)) void bar() {
- f0 = foo;
- f1 = foo;
- f2 = foo;
- n = 4;
-}
-int main(int argc, char *argv[]) {
- int i;
- bar();
- if (argc == 1) {
- f0();
- for (i = 0; i < 9; i++)
- f1();
- for (i = 0; i < 99; i++)
- f2();
- } else {
- memcpy((void *)dst, (void *)src, n);
- for (i = 0; i < 6; i++)
- memcpy((void *)(dst + 2), (void *)src, n + 1);
- for (i = 0; i < 66; i++)
- memcpy((void *)(dst + 9), (void *)src, n + 2);
- }
-}
+/// -z start-stop-gc requires binutils 2.37.
+// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=bfd -ffunction-sections -fdata-sections -Wl,--gc-sections
+// RUN: rm -rf %t.profdir
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
-// CHECK: Counters:
-// CHECK: main:
-// CHECK: Hash: 0x0a9bd81e87ab6e87
-// CHECK: Counters: 6
-// CHECK: Indirect Call Site Count: 3
-// CHECK: Number of Memory Intrinsics Calls: 3
-// CHECK: Block counts: [27, 297, 12, 132, 3, 2]
-// CHECK: Indirect Target Results:
-// CHECK: [ 0, foo, 3 ]
-// CHECK: [ 1, foo, 27 ]
-// CHECK: [ 2, foo, 297 ]
-// CHECK: Memory Intrinsic Size Results:
-// CHECK: [ 0, 4, 2 ]
-// CHECK: [ 1, 5, 12 ]
-// CHECK: [ 2, 6, 132 ]
-// CHECK: Instrumentation level: IR
-// CHECK: Functions shown: 1
-// CHECK: Total functions: 3
-// CHECK: Maximum function count: 327
-// CHECK: Maximum internal block count: 297
-// CHECK: Statistics for indirect call sites profile:
-// CHECK: Total number of sites: 3
-// CHECK: Total number of sites with values: 3
-// CHECK: Total number of profiled values: 3
-// CHECK: Value sites histogram:
-// CHECK: NumTargets, SiteCount
-// CHECK: 1, 3
-// CHECK: Statistics for memory intrinsic calls sizes profile:
-// CHECK: Total number of sites: 3
-// CHECK: Total number of sites with values: 3
-// CHECK: Total number of profiled values: 3
-// CHECK: Value sites histogram:
-// CHECK: NumTargets, SiteCount
-// CHECK: 1, 3
+// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=gold -ffunction-sections -fdata-sections -Wl,--gc-sections
+// RUN: rm -rf %t.profdir
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index 5242211138f5..94b156f3b137 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -57,6 +57,7 @@ class InstrProfiling : public PassInfoMixin<InstrProfiling> {
}
};
DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
+ std::vector<GlobalValue *> CompilerUsedVars;
std::vector<GlobalValue *> UsedVars;
std::vector<GlobalVariable *> ReferencedNames;
GlobalVariable *NamesVar;
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index d73bb66ed003..a17d6f52d77d 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -539,6 +539,7 @@ bool InstrProfiling::run(
NamesVar = nullptr;
NamesSize = 0;
ProfileDataMap.clear();
+ CompilerUsedVars.clear();
UsedVars.clear();
TT = Triple(M.getTargetTriple());
@@ -921,7 +922,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
ProfileDataMap[NamePtr] = PD;
// Mark the data variable as used so that it isn't stripped out.
- UsedVars.push_back(Data);
+ CompilerUsedVars.push_back(Data);
// Now that the linkage set by the FE has been passed to the data and counter
// variables, reset Name variable's linkage and visibility to private so that
// it can be removed later by the compiler.
@@ -976,6 +977,8 @@ void InstrProfiling::emitVNodes() {
Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
VNodesVar->setSection(
getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
+ // VNodesVar is used by runtime but not referenced via relocation by other
+ // sections. Conservatively make it linker retained.
UsedVars.push_back(VNodesVar);
}
@@ -1004,6 +1007,8 @@ void InstrProfiling::emitNameData() {
// linker from inserting padding before the start of the names section or
// between names entries.
NamesVar->setAlignment(Align(1));
+ // NamesVar is used by runtime but not referenced via relocation by other
+ // sections. Conservatively make it linker retained.
UsedVars.push_back(NamesVar);
for (auto *NamePtr : ReferencedNames)
@@ -1031,6 +1036,9 @@ void InstrProfiling::emitRegistration() {
getInstrProfRegFuncName(), M);
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
+ for (Value *Data : CompilerUsedVars)
+ if (Data != NamesVar && !isa<Function>(Data))
+ IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
for (Value *Data : UsedVars)
if (Data != NamesVar && !isa<Function>(Data))
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
@@ -1081,7 +1089,7 @@ bool InstrProfiling::emitRuntimeHook() {
IRB.CreateRet(Load);
// Mark the user variable as used so that it isn't stripped out.
- UsedVars.push_back(User);
+ CompilerUsedVars.push_back(User);
return true;
}
@@ -1094,9 +1102,14 @@ void InstrProfiling::emitUses() {
// or discarded as a unit, so llvm.compiler.used is sufficient. Otherwise,
// conservatively make all of them retained by the linker.
if (TT.isOSBinFormatELF())
- appendToCompilerUsed(*M, UsedVars);
+ appendToCompilerUsed(*M, CompilerUsedVars);
else
- appendToUsed(*M, UsedVars);
+ appendToUsed(*M, CompilerUsedVars);
+
+ // We do not add proper references from used metadata sections to NamesVar and
+ // VNodesVar, so we have to be conservative and place them in llvm.used
+ // regardless of the target,
+ appendToUsed(*M, UsedVars);
}
void InstrProfiling::emitInitialization() {
diff --git a/llvm/test/Instrumentation/InstrProfiling/icall.ll b/llvm/test/Instrumentation/InstrProfiling/icall.ll
index 311770ae5707..bc7d6c90d0be 100644
--- a/llvm/test/Instrumentation/InstrProfiling/icall.ll
+++ b/llvm/test/Instrumentation/InstrProfiling/icall.ll
@@ -50,6 +50,12 @@ attributes #0 = { nounwind }
; DYN-NOT: @__profvp_foo
; DYN-NOT: @__llvm_prf_vnodes
+;; __llvm_prf_vnodes and __llvm_prf_nm are not referenced by other metadata sections.
+;; We have to conservatively place them in llvm.used.
+; STATIC: @llvm.used = appending global
+; STATIC-SAME: @__llvm_prf_vnodes
+; STATIC-SAME: @__llvm_prf_nm
+
; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0)
; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0)
; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0)
More information about the llvm-commits
mailing list