[clang] [compiler-rt] [profile] Add `%b` `LLVM_PROFILE_FILE` option for binary ID (PR #123963)

Sinkevich Artem via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 22 08:24:59 PST 2025


https://github.com/ArtSin created https://github.com/llvm/llvm-project/pull/123963

Add support for expanding `%b` in `LLVM_PROFILE_FILE` to the binary ID (build ID). It can be used with `%m` to avoid its signature collisions.

This is supported on all platforms where writing binary IDs into profiles is implemented, as the `__llvm_write_binary_ids` function is used.

Fixes #51560.

>From cd11146d490985caae55a123fd34b6d3b3462077 Mon Sep 17 00:00:00 2001
From: ArtSin <artsin666 at gmail.com>
Date: Wed, 22 Jan 2025 20:20:56 +0400
Subject: [PATCH] [profile] Add `%b` `LLVM_PROFILE_FILE` option for binary ID

Add support for expanding `%b` in `LLVM_PROFILE_FILE` to the binary ID
(build ID). It can be used with `%m` to avoid its signature collisions.

This is supported on all platforms where writing binary IDs into profiles
is implemented, as the `__llvm_write_binary_ids` function is used.

Fixes #51560.
---
 clang/docs/SourceBasedCodeCoverage.rst        |  5 ++
 clang/docs/UsersManual.rst                    |  5 +-
 compiler-rt/lib/profile/InstrProfilingFile.c  | 57 +++++++++++++++++--
 .../test/profile/Linux/binary-id-path.c       | 37 ++++++++++++
 4 files changed, 97 insertions(+), 7 deletions(-)
 create mode 100644 compiler-rt/test/profile/Linux/binary-id-path.c

diff --git a/clang/docs/SourceBasedCodeCoverage.rst b/clang/docs/SourceBasedCodeCoverage.rst
index 73910e134a5891..49bce3b72b45aa 100644
--- a/clang/docs/SourceBasedCodeCoverage.rst
+++ b/clang/docs/SourceBasedCodeCoverage.rst
@@ -94,6 +94,11 @@ directory structure will be created.  Additionally, the following special
   not specified (i.e the pattern is "%m"), it's assumed that ``N = 1``. The
   merge pool specifier can only occur once per filename pattern.
 
+* "%b" expands out to the binary ID (build ID). It can be used with "%Nm" to
+  avoid binary signature collisions. To use it, the program should be compiled
+  with the build ID linker option (``--build-id`` for GNU ld or LLD). Linux,
+  Windows and AIX are supported.
+
 * "%c" expands out to nothing, but enables a mode in which profile counter
   updates are continuously synced to a file. This means that if the
   instrumented program crashes, or is killed by a signal, perfect coverage
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 260e84910c6f78..bc0ce3f3ccad07 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2880,7 +2880,8 @@ instrumentation:
    environment variable to specify an alternate file. If non-default file name
    is specified by both the environment variable and the command line option,
    the environment variable takes precedence. The file name pattern specified
-   can include different modifiers: ``%p``, ``%h``, ``%m``, ``%t``, and ``%c``.
+   can include different modifiers: ``%p``, ``%h``, ``%m``, ``%b``, ``%t``, and
+   ``%c``.
 
    Any instance of ``%p`` in that file name will be replaced by the process
    ID, so that you can easily distinguish the profile output from multiple
@@ -2917,7 +2918,7 @@ instrumentation:
      $ LLVM_PROFILE_FILE="code-%m.profraw" ./code
 
    See `this <SourceBasedCodeCoverage.html#running-the-instrumented-program>`_ section
-   about the ``%t``, and ``%c`` modifiers.
+   about the ``%b``, ``%t``, and ``%c`` modifiers.
 
 3. Combine profiles from multiple runs and convert the "raw" profile format to
    the input expected by clang. Use the ``merge`` command of the
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index bad4cc71801ec4..01fd314e577f76 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -77,6 +77,7 @@ typedef struct lprofFilename {
   char Hostname[COMPILER_RT_MAX_HOSTLEN];
   unsigned NumPids;
   unsigned NumHosts;
+  unsigned NumBinaryIds;
   /* When in-process merging is enabled, this parameter specifies
    * the total number of profile data files shared by all the processes
    * spawned from the same binary. By default the value is 1. If merging
@@ -88,8 +89,8 @@ typedef struct lprofFilename {
   ProfileNameSpecifier PNS;
 } lprofFilename;
 
-static lprofFilename lprofCurFilename = {0,   0, 0, {0}, NULL,
-                                         {0}, 0, 0, 0,   PNS_unknown};
+static lprofFilename lprofCurFilename = {0, 0, 0, {0}, NULL,       {0},
+                                         0, 0, 0, 0,   PNS_unknown};
 
 static int ProfileMergeRequested = 0;
 static int getProfileFileSizeForMerging(FILE *ProfileFile,
@@ -790,7 +791,7 @@ static int checkBounds(int Idx, int Strlen) {
  * lprofcurFilename structure. */
 static int parseFilenamePattern(const char *FilenamePat,
                                 unsigned CopyFilenamePat) {
-  int NumPids = 0, NumHosts = 0, I;
+  int NumPids = 0, NumHosts = 0, NumBinaryIds = 0, I;
   char *PidChars = &lprofCurFilename.PidChars[0];
   char *Hostname = &lprofCurFilename.Hostname[0];
   int MergingEnabled = 0;
@@ -855,6 +856,15 @@ static int parseFilenamePattern(const char *FilenamePat,
                     FilenamePat);
           return -1;
         }
+      } else if (FilenamePat[I] == 'b') {
+        if (!NumBinaryIds++) {
+          if (__llvm_write_binary_ids(NULL) <= 0) {
+            PROF_WARN("Unable to get binary ID for filename pattern %s. Using "
+                      "the default name.",
+                      FilenamePat);
+            return -1;
+          }
+        }
       } else if (FilenamePat[I] == 'c') {
         if (__llvm_profile_is_continuous_mode_enabled()) {
           PROF_WARN("%%c specifier can only be specified once in %s.\n",
@@ -887,6 +897,7 @@ static int parseFilenamePattern(const char *FilenamePat,
 
   lprofCurFilename.NumPids = NumPids;
   lprofCurFilename.NumHosts = NumHosts;
+  lprofCurFilename.NumBinaryIds = NumBinaryIds;
   return 0;
 }
 
@@ -934,24 +945,53 @@ static void parseAndSetFilename(const char *FilenamePat,
  * filename with PID and hostname substitutions. */
 /* The length to hold uint64_t followed by 3 digits pool id including '_' */
 #define SIGLEN 24
+/* The length to hold 160-bit hash in hexadecimal form */
+#define BINARY_ID_LEN 40
 static int getCurFilenameLength(void) {
   int Len;
   if (!lprofCurFilename.FilenamePat || !lprofCurFilename.FilenamePat[0])
     return 0;
 
   if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts ||
-        lprofCurFilename.TmpDir || lprofCurFilename.MergePoolSize))
+        lprofCurFilename.NumBinaryIds || lprofCurFilename.TmpDir ||
+        lprofCurFilename.MergePoolSize))
     return strlen(lprofCurFilename.FilenamePat);
 
   Len = strlen(lprofCurFilename.FilenamePat) +
         lprofCurFilename.NumPids * (strlen(lprofCurFilename.PidChars) - 2) +
         lprofCurFilename.NumHosts * (strlen(lprofCurFilename.Hostname) - 2) +
+        lprofCurFilename.NumBinaryIds * BINARY_ID_LEN +
         (lprofCurFilename.TmpDir ? (strlen(lprofCurFilename.TmpDir) - 1) : 0);
   if (lprofCurFilename.MergePoolSize)
     Len += SIGLEN;
   return Len;
 }
 
+typedef struct lprofBinaryIdsBuffer {
+  char String[BINARY_ID_LEN + 1];
+  int Length;
+} lprofBinaryIdsBuffer;
+
+/* Reads binary ID length and then its data, writes it into lprofBinaryIdsBuffer
+ * in hexadecimal form. */
+static uint32_t binaryIdsStringWriter(ProfDataWriter *This,
+                                      ProfDataIOVec *IOVecs,
+                                      uint32_t NumIOVecs) {
+  if (NumIOVecs < 2 || IOVecs[0].ElmSize != sizeof(uint64_t))
+    return -1;
+  uint64_t BinaryIdLen = *(const uint64_t *)IOVecs[0].Data;
+  if (IOVecs[1].ElmSize != sizeof(uint8_t) || IOVecs[1].NumElm != BinaryIdLen)
+    return -1;
+  const uint8_t *BinaryIdData = (const uint8_t *)IOVecs[1].Data;
+  lprofBinaryIdsBuffer *Data = (lprofBinaryIdsBuffer *)This->WriterCtx;
+  for (uint64_t I = 0; I < BinaryIdLen; I++) {
+    Data->Length +=
+        snprintf(Data->String + Data->Length, BINARY_ID_LEN + 1 - Data->Length,
+                 "%02hhx", BinaryIdData[I]);
+  }
+  return 0;
+}
+
 /* Return the pointer to the current profile file name (after substituting
  * PIDs and Hostnames in filename pattern. \p FilenameBuf is the buffer
  * to store the resulting filename. If no substitution is needed, the
@@ -965,7 +1005,8 @@ static const char *getCurFilename(char *FilenameBuf, int ForceUseBuf) {
     return 0;
 
   if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts ||
-        lprofCurFilename.TmpDir || lprofCurFilename.MergePoolSize ||
+        lprofCurFilename.NumBinaryIds || lprofCurFilename.TmpDir ||
+        lprofCurFilename.MergePoolSize ||
         __llvm_profile_is_continuous_mode_enabled())) {
     if (!ForceUseBuf)
       return lprofCurFilename.FilenamePat;
@@ -992,6 +1033,12 @@ static const char *getCurFilename(char *FilenameBuf, int ForceUseBuf) {
         memcpy(FilenameBuf + J, lprofCurFilename.TmpDir, TmpDirLength);
         FilenameBuf[J + TmpDirLength] = DIR_SEPARATOR;
         J += TmpDirLength + 1;
+      } else if (FilenamePat[I] == 'b') {
+        lprofBinaryIdsBuffer Data = {{0}, 0};
+        ProfDataWriter Writer = {binaryIdsStringWriter, &Data};
+        __llvm_write_binary_ids(&Writer);
+        memcpy(FilenameBuf + J, Data.String, Data.Length);
+        J += Data.Length;
       } else {
         if (!getMergePoolSize(FilenamePat, &I))
           continue;
diff --git a/compiler-rt/test/profile/Linux/binary-id-path.c b/compiler-rt/test/profile/Linux/binary-id-path.c
new file mode 100644
index 00000000000000..7dce409d2e8f91
--- /dev/null
+++ b/compiler-rt/test/profile/Linux/binary-id-path.c
@@ -0,0 +1,37 @@
+// REQUIRES: linux
+// RUN: split-file %s %t.dir
+// RUN: %clang_profgen -Wl,--build-id=sha1 -o %t.dir/foo %t.dir/foo.c
+// RUN: %clang_profgen -Wl,--build-id=sha1 -o %t.dir/bar %t.dir/bar.c
+
+// Check that foo and bar have the same signatures.
+// RUN: rm -rf %t.profdir
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.profraw %run %t.dir/foo
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.profraw %run %t.dir/bar 2>&1 | FileCheck %s --check-prefix=MERGE-ERROR
+
+// Check that foo and bar have different binary IDs.
+// RUN: rm -rf %t.profdir %t.profdata
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/%b.profraw %run %t.dir/foo
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/%b.profraw %run %t.dir/bar
+// RUN: llvm-profdata merge -o %t.profdata %t.profdir
+// RUN: llvm-profdata show %t.profdata | FileCheck %s --check-prefix=BINARY-ID
+
+// Check fallback to the default name if binary ID is missing.
+// RUN: %clang_profgen -Wl,--build-id=none -o %t.dir/foo %t.dir/foo.c
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/%b.profraw %run %t.dir/foo 2>&1 | FileCheck %s --check-prefix=MISSING
+
+// MERGE-ERROR: LLVM Profile Error: Profile Merging of file {{.*}}.profraw failed: File exists
+
+// BINARY-ID: Instrumentation level: Front-end
+// BINARY-ID-NEXT: Total functions: 3
+// BINARY-ID-NEXT: Maximum function count: 2
+// BINARY-ID-NEXT: Maximum internal block count: 0
+
+// MISSING: Unable to get binary ID for filename pattern {{.*}}.profraw. Using the default name.
+
+//--- foo.c
+int main(void) { return 0; }
+void foo(void) {}
+
+//--- bar.c
+int main(void) { return 0; }
+void bar(int *a) { *a += 10; }



More information about the llvm-commits mailing list