[clang] [compiler-rt] [profile] Add `%b` `LLVM_PROFILE_FILE` option for binary ID (PR #123963)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 22 08:25:52 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Sinkevich Artem (ArtSin)
<details>
<summary>Changes</summary>
Add support for expanding `%b` in `LLVM_PROFILE_FILE` to the binary ID (build ID). It can be used with `%m` to avoid its signature collisions.
This is supported on all platforms where writing binary IDs into profiles is implemented, as the `__llvm_write_binary_ids` function is used.
Fixes #<!-- -->51560.
---
Full diff: https://github.com/llvm/llvm-project/pull/123963.diff
4 Files Affected:
- (modified) clang/docs/SourceBasedCodeCoverage.rst (+5)
- (modified) clang/docs/UsersManual.rst (+3-2)
- (modified) compiler-rt/lib/profile/InstrProfilingFile.c (+52-5)
- (added) compiler-rt/test/profile/Linux/binary-id-path.c (+37)
``````````diff
diff --git a/clang/docs/SourceBasedCodeCoverage.rst b/clang/docs/SourceBasedCodeCoverage.rst
index 73910e134a5891..49bce3b72b45aa 100644
--- a/clang/docs/SourceBasedCodeCoverage.rst
+++ b/clang/docs/SourceBasedCodeCoverage.rst
@@ -94,6 +94,11 @@ directory structure will be created. Additionally, the following special
not specified (i.e the pattern is "%m"), it's assumed that ``N = 1``. The
merge pool specifier can only occur once per filename pattern.
+* "%b" expands out to the binary ID (build ID). It can be used with "%Nm" to
+ avoid binary signature collisions. To use it, the program should be compiled
+ with the build ID linker option (``--build-id`` for GNU ld or LLD). Linux,
+ Windows and AIX are supported.
+
* "%c" expands out to nothing, but enables a mode in which profile counter
updates are continuously synced to a file. This means that if the
instrumented program crashes, or is killed by a signal, perfect coverage
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 260e84910c6f78..bc0ce3f3ccad07 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2880,7 +2880,8 @@ instrumentation:
environment variable to specify an alternate file. If non-default file name
is specified by both the environment variable and the command line option,
the environment variable takes precedence. The file name pattern specified
- can include different modifiers: ``%p``, ``%h``, ``%m``, ``%t``, and ``%c``.
+ can include different modifiers: ``%p``, ``%h``, ``%m``, ``%b``, ``%t``, and
+ ``%c``.
Any instance of ``%p`` in that file name will be replaced by the process
ID, so that you can easily distinguish the profile output from multiple
@@ -2917,7 +2918,7 @@ instrumentation:
$ LLVM_PROFILE_FILE="code-%m.profraw" ./code
See `this <SourceBasedCodeCoverage.html#running-the-instrumented-program>`_ section
- about the ``%t``, and ``%c`` modifiers.
+ about the ``%b``, ``%t``, and ``%c`` modifiers.
3. Combine profiles from multiple runs and convert the "raw" profile format to
the input expected by clang. Use the ``merge`` command of the
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index bad4cc71801ec4..01fd314e577f76 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -77,6 +77,7 @@ typedef struct lprofFilename {
char Hostname[COMPILER_RT_MAX_HOSTLEN];
unsigned NumPids;
unsigned NumHosts;
+ unsigned NumBinaryIds;
/* When in-process merging is enabled, this parameter specifies
* the total number of profile data files shared by all the processes
* spawned from the same binary. By default the value is 1. If merging
@@ -88,8 +89,8 @@ typedef struct lprofFilename {
ProfileNameSpecifier PNS;
} lprofFilename;
-static lprofFilename lprofCurFilename = {0, 0, 0, {0}, NULL,
- {0}, 0, 0, 0, PNS_unknown};
+static lprofFilename lprofCurFilename = {0, 0, 0, {0}, NULL, {0},
+ 0, 0, 0, 0, PNS_unknown};
static int ProfileMergeRequested = 0;
static int getProfileFileSizeForMerging(FILE *ProfileFile,
@@ -790,7 +791,7 @@ static int checkBounds(int Idx, int Strlen) {
* lprofcurFilename structure. */
static int parseFilenamePattern(const char *FilenamePat,
unsigned CopyFilenamePat) {
- int NumPids = 0, NumHosts = 0, I;
+ int NumPids = 0, NumHosts = 0, NumBinaryIds = 0, I;
char *PidChars = &lprofCurFilename.PidChars[0];
char *Hostname = &lprofCurFilename.Hostname[0];
int MergingEnabled = 0;
@@ -855,6 +856,15 @@ static int parseFilenamePattern(const char *FilenamePat,
FilenamePat);
return -1;
}
+ } else if (FilenamePat[I] == 'b') {
+ if (!NumBinaryIds++) {
+ if (__llvm_write_binary_ids(NULL) <= 0) {
+ PROF_WARN("Unable to get binary ID for filename pattern %s. Using "
+ "the default name.",
+ FilenamePat);
+ return -1;
+ }
+ }
} else if (FilenamePat[I] == 'c') {
if (__llvm_profile_is_continuous_mode_enabled()) {
PROF_WARN("%%c specifier can only be specified once in %s.\n",
@@ -887,6 +897,7 @@ static int parseFilenamePattern(const char *FilenamePat,
lprofCurFilename.NumPids = NumPids;
lprofCurFilename.NumHosts = NumHosts;
+ lprofCurFilename.NumBinaryIds = NumBinaryIds;
return 0;
}
@@ -934,24 +945,53 @@ static void parseAndSetFilename(const char *FilenamePat,
* filename with PID and hostname substitutions. */
/* The length to hold uint64_t followed by 3 digits pool id including '_' */
#define SIGLEN 24
+/* The length to hold 160-bit hash in hexadecimal form */
+#define BINARY_ID_LEN 40
static int getCurFilenameLength(void) {
int Len;
if (!lprofCurFilename.FilenamePat || !lprofCurFilename.FilenamePat[0])
return 0;
if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts ||
- lprofCurFilename.TmpDir || lprofCurFilename.MergePoolSize))
+ lprofCurFilename.NumBinaryIds || lprofCurFilename.TmpDir ||
+ lprofCurFilename.MergePoolSize))
return strlen(lprofCurFilename.FilenamePat);
Len = strlen(lprofCurFilename.FilenamePat) +
lprofCurFilename.NumPids * (strlen(lprofCurFilename.PidChars) - 2) +
lprofCurFilename.NumHosts * (strlen(lprofCurFilename.Hostname) - 2) +
+ lprofCurFilename.NumBinaryIds * BINARY_ID_LEN +
(lprofCurFilename.TmpDir ? (strlen(lprofCurFilename.TmpDir) - 1) : 0);
if (lprofCurFilename.MergePoolSize)
Len += SIGLEN;
return Len;
}
+typedef struct lprofBinaryIdsBuffer {
+ char String[BINARY_ID_LEN + 1];
+ int Length;
+} lprofBinaryIdsBuffer;
+
+/* Reads binary ID length and then its data, writes it into lprofBinaryIdsBuffer
+ * in hexadecimal form. */
+static uint32_t binaryIdsStringWriter(ProfDataWriter *This,
+ ProfDataIOVec *IOVecs,
+ uint32_t NumIOVecs) {
+ if (NumIOVecs < 2 || IOVecs[0].ElmSize != sizeof(uint64_t))
+ return -1;
+ uint64_t BinaryIdLen = *(const uint64_t *)IOVecs[0].Data;
+ if (IOVecs[1].ElmSize != sizeof(uint8_t) || IOVecs[1].NumElm != BinaryIdLen)
+ return -1;
+ const uint8_t *BinaryIdData = (const uint8_t *)IOVecs[1].Data;
+ lprofBinaryIdsBuffer *Data = (lprofBinaryIdsBuffer *)This->WriterCtx;
+ for (uint64_t I = 0; I < BinaryIdLen; I++) {
+ Data->Length +=
+ snprintf(Data->String + Data->Length, BINARY_ID_LEN + 1 - Data->Length,
+ "%02hhx", BinaryIdData[I]);
+ }
+ return 0;
+}
+
/* Return the pointer to the current profile file name (after substituting
* PIDs and Hostnames in filename pattern. \p FilenameBuf is the buffer
* to store the resulting filename. If no substitution is needed, the
@@ -965,7 +1005,8 @@ static const char *getCurFilename(char *FilenameBuf, int ForceUseBuf) {
return 0;
if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts ||
- lprofCurFilename.TmpDir || lprofCurFilename.MergePoolSize ||
+ lprofCurFilename.NumBinaryIds || lprofCurFilename.TmpDir ||
+ lprofCurFilename.MergePoolSize ||
__llvm_profile_is_continuous_mode_enabled())) {
if (!ForceUseBuf)
return lprofCurFilename.FilenamePat;
@@ -992,6 +1033,12 @@ static const char *getCurFilename(char *FilenameBuf, int ForceUseBuf) {
memcpy(FilenameBuf + J, lprofCurFilename.TmpDir, TmpDirLength);
FilenameBuf[J + TmpDirLength] = DIR_SEPARATOR;
J += TmpDirLength + 1;
+ } else if (FilenamePat[I] == 'b') {
+ lprofBinaryIdsBuffer Data = {{0}, 0};
+ ProfDataWriter Writer = {binaryIdsStringWriter, &Data};
+ __llvm_write_binary_ids(&Writer);
+ memcpy(FilenameBuf + J, Data.String, Data.Length);
+ J += Data.Length;
} else {
if (!getMergePoolSize(FilenamePat, &I))
continue;
diff --git a/compiler-rt/test/profile/Linux/binary-id-path.c b/compiler-rt/test/profile/Linux/binary-id-path.c
new file mode 100644
index 00000000000000..7dce409d2e8f91
--- /dev/null
+++ b/compiler-rt/test/profile/Linux/binary-id-path.c
@@ -0,0 +1,37 @@
+// REQUIRES: linux
+// RUN: split-file %s %t.dir
+// RUN: %clang_profgen -Wl,--build-id=sha1 -o %t.dir/foo %t.dir/foo.c
+// RUN: %clang_profgen -Wl,--build-id=sha1 -o %t.dir/bar %t.dir/bar.c
+
+// Check that foo and bar have the same signatures.
+// RUN: rm -rf %t.profdir
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.profraw %run %t.dir/foo
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.profraw %run %t.dir/bar 2>&1 | FileCheck %s --check-prefix=MERGE-ERROR
+
+// Check that foo and bar have different binary IDs.
+// RUN: rm -rf %t.profdir %t.profdata
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/%b.profraw %run %t.dir/foo
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/%b.profraw %run %t.dir/bar
+// RUN: llvm-profdata merge -o %t.profdata %t.profdir
+// RUN: llvm-profdata show %t.profdata | FileCheck %s --check-prefix=BINARY-ID
+
+// Check fallback to the default name if binary ID is missing.
+// RUN: %clang_profgen -Wl,--build-id=none -o %t.dir/foo %t.dir/foo.c
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/%b.profraw %run %t.dir/foo 2>&1 | FileCheck %s --check-prefix=MISSING
+
+// MERGE-ERROR: LLVM Profile Error: Profile Merging of file {{.*}}.profraw failed: File exists
+
+// BINARY-ID: Instrumentation level: Front-end
+// BINARY-ID-NEXT: Total functions: 3
+// BINARY-ID-NEXT: Maximum function count: 2
+// BINARY-ID-NEXT: Maximum internal block count: 0
+
+// MISSING: Unable to get binary ID for filename pattern {{.*}}.profraw. Using the default name.
+
+//--- foo.c
+int main(void) { return 0; }
+void foo(void) {}
+
+//--- bar.c
+int main(void) { return 0; }
+void bar(int *a) { *a += 10; }
``````````
</details>
https://github.com/llvm/llvm-project/pull/123963
More information about the llvm-commits
mailing list