[compiler-rt] 667e1fa - [profile] Use base+vaddr for `__llvm_write_binary_ids` note pointers (#114907)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 21 10:14:32 PST 2024


Author: Josh Stone
Date: 2024-11-21T10:14:29-08:00
New Revision: 667e1fadcf4376ce41f5cae7cabab9f5ccc77b15

URL: https://github.com/llvm/llvm-project/commit/667e1fadcf4376ce41f5cae7cabab9f5ccc77b15
DIFF: https://github.com/llvm/llvm-project/commit/667e1fadcf4376ce41f5cae7cabab9f5ccc77b15.diff

LOG: [profile] Use base+vaddr for `__llvm_write_binary_ids` note pointers (#114907)

This function is always examining its own ELF headers in memory, but it
was trying to use conditions between examining files or memory, and it
wasn't accounting for LOAD offsets at runtime. This is especially bad if
a loaded segment has additional padding that's not in the file offsets.

Now we do a first scan of the program headers to figure out the runtime
base address based on `PT_PHDR` and/or `PT_DYNAMIC` (else assume zero),
similar to libc's `do_start`. Then each `PT_NOTE` pointer is simply the
base plus the segments's `pt_vaddr`, which includes LOAD offsets.

Fixes #114605

Added: 
    compiler-rt/test/profile/Linux/binary-id-offset.c

Modified: 
    compiler-rt/lib/profile/InstrProfilingPlatformLinux.c

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
index 613cfb60857cf3..5b230c1b200623 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -194,41 +194,33 @@ static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
  */
 COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
   extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden")));
+  extern ElfW(Dyn) _DYNAMIC[] __attribute__((weak, visibility("hidden")));
+
   const ElfW(Ehdr) *ElfHeader = &__ehdr_start;
   const ElfW(Phdr) *ProgramHeader =
       (const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff);
 
+  /* Compute the added base address in case of position-independent code. */
+  uintptr_t Base = 0;
+  for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
+    if (ProgramHeader[I].p_type == PT_PHDR)
+      Base = (uintptr_t)ProgramHeader - ProgramHeader[I].p_vaddr;
+    if (ProgramHeader[I].p_type == PT_DYNAMIC && _DYNAMIC)
+      Base = (uintptr_t)_DYNAMIC - ProgramHeader[I].p_vaddr;
+  }
+
   int TotalBinaryIdsSize = 0;
-  uint32_t I;
   /* Iterate through entries in the program header. */
-  for (I = 0; I < ElfHeader->e_phnum; I++) {
+  for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
     /* Look for the notes segment in program header entries. */
     if (ProgramHeader[I].p_type != PT_NOTE)
       continue;
 
     /* There can be multiple notes segment, and examine each of them. */
-    const ElfW(Nhdr) * Note;
-    const ElfW(Nhdr) * NotesEnd;
-    /*
-     * When examining notes in file, use p_offset, which is the offset within
-     * the elf file, to find the start of notes.
-     */
-    if (ProgramHeader[I].p_memsz == 0 ||
-        ProgramHeader[I].p_memsz == ProgramHeader[I].p_filesz) {
-      Note = (const ElfW(Nhdr) *)((uintptr_t)ElfHeader +
-                                  ProgramHeader[I].p_offset);
-      NotesEnd = (const ElfW(Nhdr) *)((const char *)(Note) +
-                                      ProgramHeader[I].p_filesz);
-    } else {
-      /*
-       * When examining notes in memory, use p_vaddr, which is the address of
-       * section after loaded to memory, to find the start of notes.
-       */
-      Note =
-          (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_vaddr);
-      NotesEnd =
-          (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
-    }
+    const ElfW(Nhdr) *Note =
+        (const ElfW(Nhdr) *)(Base + ProgramHeader[I].p_vaddr);
+    const ElfW(Nhdr) *NotesEnd =
+        (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
 
     int BinaryIdsSize = WriteBinaryIds(Writer, Note, NotesEnd);
     if (TotalBinaryIdsSize == -1)

diff  --git a/compiler-rt/test/profile/Linux/binary-id-offset.c b/compiler-rt/test/profile/Linux/binary-id-offset.c
new file mode 100644
index 00000000000000..c66fe82d714ce9
--- /dev/null
+++ b/compiler-rt/test/profile/Linux/binary-id-offset.c
@@ -0,0 +1,33 @@
+// REQUIRES: linux
+//
+// Make sure the build-id can be found in both EXEC and DYN (PIE) files,
+// even when the note's section-start is forced to a weird address.
+// (The DYN case would also apply to libraries, not explicitly tested here.)
+
+// DEFINE: %{cflags} =
+// DEFINE: %{check} = (                                             \
+// DEFINE:     %clang_profgen -Wl,--build-id -o %t %s %{cflags}  && \
+// DEFINE:     env LLVM_PROFILE_FILE=%t.profraw %run %t          && \
+// DEFINE:     llvm-readelf --notes %t                           && \
+// DEFINE:     llvm-profdata show --binary-ids %t.profraw           \
+// DEFINE:   ) | FileCheck %s
+
+// REDEFINE: %{cflags} = -no-pie
+// RUN: %{check}
+
+// REDEFINE: %{cflags} = -pie -fPIE
+// RUN: %{check}
+
+// REDEFINE: %{cflags} = -no-pie -Wl,--section-start=.note.gnu.build-id=0x1000000
+// RUN: %{check}
+
+// REDEFINE: %{cflags} = -pie -fPIE -Wl,--section-start=.note.gnu.build-id=0x1000000
+// RUN: %{check}
+
+// CHECK-LABEL{LITERAL}: .note.gnu.build-id
+// CHECK: Build ID: [[ID:[0-9a-f]+]]
+
+// CHECK-LABEL{LITERAL}: Binary IDs:
+// CHECK-NEXT: [[ID]]
+
+int main() { return 0; }


        


More information about the llvm-commits mailing list