[compiler-rt] [llvm] Introduce XRay object mapping file to enable symbolization for DSOs (PR #133269)

Sebastian Kreutzer via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 27 08:56:11 PDT 2025


https://github.com/sebastiankreutzer created https://github.com/llvm/llvm-project/pull/133269

Prototype implementation for DSO function resolution for XRay. 
- Adds the `__xray_object_path` API function to look up the binary path of a loaded DSO
- At the end of tracing, writes out a YAML file, containing the mapping of object IDs to binaries
- Extends `llvm-xray extract` to allow generating a combined instrumentation map file, given the mapping YAML as input

>From bc0ae8d83cc0a297b26b4dcb64814347a6b15d8b Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <SebastianKreutzer at gmx.net>
Date: Thu, 27 Mar 2025 16:42:20 +0100
Subject: [PATCH] [XRay] Prototype implementation of XRay object mapping file
 to enable symbolization for DSOs

---
 compiler-rt/include/xray/xray_interface.h     |   6 +
 compiler-rt/lib/xray/xray_basic_logging.cpp   |  40 ++++-
 compiler-rt/lib/xray/xray_init.cpp            |  63 +++++++
 compiler-rt/lib/xray/xray_interface.cpp       |  35 ++++
 .../lib/xray/xray_interface_internal.h        |   3 +
 compiler-rt/lib/xray/xray_utils.cpp           |   8 +-
 compiler-rt/lib/xray/xray_utils.h             |   7 +-
 llvm/tools/llvm-xray/xray-extract.cpp         | 158 +++++++++++++++---
 8 files changed, 287 insertions(+), 33 deletions(-)

diff --git a/compiler-rt/include/xray/xray_interface.h b/compiler-rt/include/xray/xray_interface.h
index 675ea0cbc48c8..f8b07df688989 100644
--- a/compiler-rt/include/xray/xray_interface.h
+++ b/compiler-rt/include/xray/xray_interface.h
@@ -165,6 +165,12 @@ extern int32_t __xray_unpack_object_id(int32_t PackedId);
 /// high bits are truncated.
 extern int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId);
 
+/// Returns the path from which the given object (executable or DSO) was loaded
+/// from.
+/// The returned string is owned by the XRay runtime and remains valid until the
+/// end of execution.
+extern const char* __xray_object_path(int32_t ObjId);
+
 /// Initialize the required XRay data structures. This is useful in cases where
 /// users want to control precisely when the XRay instrumentation data
 /// structures are initialized, for example when the XRay library is built with
diff --git a/compiler-rt/lib/xray/xray_basic_logging.cpp b/compiler-rt/lib/xray/xray_basic_logging.cpp
index 6ac5417bef754..c11b9bd2d748e 100644
--- a/compiler-rt/lib/xray/xray_basic_logging.cpp
+++ b/compiler-rt/lib/xray/xray_basic_logging.cpp
@@ -439,15 +439,42 @@ XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize,
   return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
 }
 
+static void writeObjectMapping() {
+  LogWriter *LW = getGlobalLog();
+  if (!LW) {
+    Report("Log file was not initialized!\n");
+    return;
+  }
+
+  if (Verbosity())
+    Report("Writing object mapping file.\n");
+
+  char MapFilename[256] = {};
+  int NeededLength = internal_snprintf(
+      MapFilename, sizeof(MapFilename), "%s.map.yaml",
+      LW->GetFilename());
+  if (NeededLength > int(sizeof(MapFilename))) {
+    Report("XRay map file name too long (%d): %s\n", NeededLength, MapFilename);
+    return;
+  }
+
+  bool Success = __xray_write_object_mapping(MapFilename);
+
+  if (!Success)
+    Report("Failed to write the XRay object mapping to %s\n", MapFilename);
+}
+
 XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT {
   uint8_t Expected = 0;
   if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0,
-                                      memory_order_acq_rel) &&
-      Verbosity())
-    Report("Basic logging already finalized.\n");
+                                      memory_order_acq_rel)) {
+    if (Verbosity())
+      Report("Basic logging already finalized.\n");
+    return XRayLogInitStatus::XRAY_LOG_FINALIZED;
+  }
 
-  // Nothing really to do aside from marking state of the global to be
-  // uninitialized.
+  // Write the object mapping file and mark state as finalized.
+  writeObjectMapping();
 
   return XRayLogInitStatus::XRAY_LOG_FINALIZED;
 }
@@ -462,6 +489,7 @@ XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT {
 void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT {
 }
 
+
 bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
   XRayLogImpl Impl{
       basicLoggingInit,
@@ -504,7 +532,7 @@ bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
     pthread_once(&DynamicOnce, +[] {
       static void *FakeTLD = nullptr;
       FakeTLD = &getThreadLocalData();
-      Atexit(+[] { TLDDestructor(FakeTLD); });
+      Atexit(+[] { TLDDestructor(FakeTLD); writeObjectMapping(); });
     });
   }
   return true;
diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index 020bfe52b5320..abc24120e4ac6 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -12,9 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include <fcntl.h>
+#include <string.h>
 #include <strings.h>
 #include <unistd.h>
 
+#ifdef __ELF__
+#include <link.h>
+#endif
+
 #include "sanitizer_common/sanitizer_common.h"
 #include "xray/xray_interface.h"
 #include "xray_allocator.h"
@@ -62,6 +67,56 @@ atomic_uint8_t XRayFlagsInitialized{0};
 // A mutex to allow only one thread to initialize the XRay data structures.
 SpinMutex XRayInitMutex;
 
+
+namespace {
+
+struct DlIteratePhdrData {
+  intptr_t TargetSledAddr{0};
+  const char* Path{nullptr};
+  bool Found{false};
+};
+
+const char* DetectObjPath(bool IsDSO, uint64_t Addr) XRAY_NEVER_INSTRUMENT {
+  if (IsDSO) {
+    // Detection of DSO paths only supported for ELF
+#ifdef __ELF__
+    // Look for the given address by iterating over the loaded DSOs
+    DlIteratePhdrData Data;
+    Data.TargetSledAddr = Addr;
+    dl_iterate_phdr([](dl_phdr_info *info, size_t size, void *arg) -> int {
+      auto *data = (DlIteratePhdrData *)arg;
+      data->Found = false;
+      for (int i = 0; i < info->dlpi_phnum; i++) {
+        const auto *phdr = &info->dlpi_phdr[i];
+        if (phdr->p_type != PT_LOAD)
+          continue;
+        intptr_t beg = info->dlpi_addr + phdr->p_vaddr;
+        intptr_t end = beg + phdr->p_memsz;
+
+        if (beg <= data->TargetSledAddr && data->TargetSledAddr < end) {
+          data->Path = info->dlpi_name;
+          data->Found = true;
+          return 1;
+        }
+      }
+      return 0;
+    }, &Data);
+    if (Data.Found) {
+      return Data.Path;
+    }
+#endif
+    return nullptr;
+  }
+
+  // If the address is in the main executable, use the built-in sanitizer
+  // functionality.
+  __sanitizer::UpdateProcessName();
+  const char* ExecName = __sanitizer::GetProcessName();
+  // TODO: Is this the full path?
+  return ExecName;
+}
+}
+
 // Registers XRay sleds and trampolines coming from the main executable or one
 // of the linked DSOs.
 // Returns the object ID if registration is successful, -1 otherwise.
@@ -107,6 +162,14 @@ __xray_register_sleds(const XRaySledEntry *SledsBegin,
   if (Verbosity())
     Report("Registering %d new functions!\n", SledMap.Functions);
 
+  const char* Path = DetectObjPath(FromDSO, SledsBegin->address());
+  if (Path) {
+    // Copy needed because DSO path is freed when unloaded.
+    SledMap.Path = internal_strdup(Path);
+  } else {
+    Report("Unable to determine load path for address %x\n", SledsBegin->address());
+  }
+
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
     auto Idx = atomic_fetch_add(&XRayNumObjects, 1, memory_order_acq_rel);
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index 3f97827874a70..7605ce743f1e6 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -688,3 +688,38 @@ int32_t __xray_unpack_object_id(int32_t PackedId) {
 int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId) {
   return __xray::MakePackedId(FuncId, ObjId);
 }
+
+const char* __xray_object_path(int32_t ObjId) {
+  SpinMutexLock Guard(&XRayInstrMapMutex);
+  if (ObjId < 0 || static_cast<uint32_t>(ObjId) >=
+                       atomic_load(&XRayNumObjects, memory_order_acquire))
+    return nullptr;
+  return XRayInstrMaps[ObjId].Path;
+}
+
+#include <stdio.h>
+
+bool __xray_write_object_mapping(const char* outfile) XRAY_NEVER_INSTRUMENT {
+  // TODO: Use low-level IO API?
+  FILE *of = fopen(outfile, "w");
+  if (!of) {
+    Report("Unable to write object mapping to file %s\n");
+    return false;
+  }
+
+  fprintf(of, "--- !XRayMapping\n");
+  fprintf(of, "num_object_bits: %d\n", __xray::XRayNObjBits);
+  fprintf(of, "objects:\n");
+  int NumObjects = __xray_num_objects();
+  for (int ObjId = 0; ObjId < NumObjects; ObjId++) {
+    auto* Path = __xray_object_path(ObjId);
+    if (!Path)
+      Report("Unknown path for object %d. The XRay mapping file may be "
+             "incomplete.\n", ObjId);
+    fprintf(of, "  - id: %d\n", ObjId);
+    fprintf(of, "    path: \"%s\"\n", Path ? Path : "");
+  }
+  fprintf(of, "...\n");
+  fclose(of);
+  return true;
+}
diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h
index 5dcccfe825cf5..5d3e4b435b25e 100644
--- a/compiler-rt/lib/xray/xray_interface_internal.h
+++ b/compiler-rt/lib/xray/xray_interface_internal.h
@@ -106,6 +106,8 @@ extern int32_t __xray_register_dso(const XRaySledEntry *SledsBegin,
                                    XRayTrampolines Trampolines);
 
 extern bool __xray_deregister_dso(int32_t ObjId);
+
+extern bool __xray_write_object_mapping(const char* outfile);
 }
 
 namespace __xray {
@@ -137,6 +139,7 @@ struct XRaySledMap {
   XRayTrampolines Trampolines;
   bool FromDSO;
   bool Loaded;
+  const char* Path;
 };
 
 bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
diff --git a/compiler-rt/lib/xray/xray_utils.cpp b/compiler-rt/lib/xray/xray_utils.cpp
index 5d51df9937c2c..7563bc18a2db2 100644
--- a/compiler-rt/lib/xray/xray_utils.cpp
+++ b/compiler-rt/lib/xray/xray_utils.cpp
@@ -124,7 +124,7 @@ LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT {
   Report("XRay: " FORMAT_DUMPFILE "\n", ProfileSinkName, VmoName);
 
   LogWriter *LW = reinterpret_cast<LogWriter *>(InternalAlloc(sizeof(LogWriter)));
-  new (LW) LogWriter(Vmo);
+  new (LW) LogWriter(Vmo, VmoName);
   return LW;
 }
 
@@ -155,6 +155,10 @@ void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUME
   }
 }
 
+const char* LogWriter::GetFilename() XRAY_NEVER_INSTRUMENT {
+  return Filename;
+}
+
 void LogWriter::Flush() XRAY_NEVER_INSTRUMENT {
   fsync(Fd);
 }
@@ -187,7 +191,7 @@ LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT {
     Report("XRay: Log file in '%s'\n", TmpFilename);
 
   LogWriter *LW = allocate<LogWriter>();
-  new (LW) LogWriter(Fd);
+  new (LW) LogWriter(Fd, TmpFilename);
   return LW;
 }
 
diff --git a/compiler-rt/lib/xray/xray_utils.h b/compiler-rt/lib/xray/xray_utils.h
index 5dc73d7fa8cde..309e4d8be8e33 100644
--- a/compiler-rt/lib/xray/xray_utils.h
+++ b/compiler-rt/lib/xray/xray_utils.h
@@ -31,13 +31,17 @@ class LogWriter {
 #if SANITIZER_FUCHSIA
  LogWriter(zx_handle_t Vmo) : Vmo(Vmo) {}
 #else
-  explicit LogWriter(int Fd) : Fd(Fd) {}
+  explicit LogWriter(int Fd, const char* Filename) : Fd(Fd) {
+    internal_strlcpy(this->Filename, Filename, sizeof(this->Filename));
+  }
 #endif
  ~LogWriter();
 
  // Write a character range into a log.
  void WriteAll(const char *Begin, const char *End);
 
+ const char* GetFilename();
+
  void Flush();
 
  // Returns a new log instance initialized using the flag-provided values.
@@ -52,6 +56,7 @@ class LogWriter {
 #else
  int Fd = -1;
 #endif
+ char Filename[256];
 };
 
 constexpr size_t gcd(size_t a, size_t b) {
diff --git a/llvm/tools/llvm-xray/xray-extract.cpp b/llvm/tools/llvm-xray/xray-extract.cpp
index 52767a00f6152..394c4f70516f8 100644
--- a/llvm/tools/llvm-xray/xray-extract.cpp
+++ b/llvm/tools/llvm-xray/xray-extract.cpp
@@ -21,8 +21,11 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
 #include "llvm/XRay/InstrumentationMap.h"
 
+#include <assert.h>
+
 using namespace llvm;
 using namespace llvm::xray;
 using namespace llvm::yaml;
@@ -51,51 +54,158 @@ static cl::opt<bool> Demangle("demangle",
 static cl::opt<bool> NoDemangle("no-demangle",
                                 cl::desc("don't demangle symbols"),
                                 cl::sub(Extract));
+static cl::opt<bool> FromMapping("mapping", cl::init(false),
+                             cl::desc("Create instrumentation map from object map YAML"),
+                             cl::sub(Extract));
 
 namespace {
 
-void exportAsYAML(const InstrumentationMap &Map, raw_ostream &OS,
-                  FuncIdConversionHelper &FH) {
-  // First we translate the sleds into the YAMLXRaySledEntry objects in a deque.
-  std::vector<YAMLXRaySledEntry> YAMLSleds;
+struct YAMLXRayObjectMapEntry {
+  int32_t ObjId;
+  std::string Path;
+};
+
+struct YAMLXRayObjectMapping {
+  int NumObjBits;
+  std::vector<YAMLXRayObjectMapEntry> Objects;
+};
+
+}
+
+namespace llvm{
+namespace yaml {
+template <> struct MappingTraits<YAMLXRayObjectMapEntry> {
+  static void mapping(IO &IO, YAMLXRayObjectMapEntry &Entry) {
+    IO.mapRequired("id", Entry.ObjId);
+    IO.mapRequired("path", Entry.Path);
+  }
+};
+
+template <> struct MappingTraits<YAMLXRayObjectMapping> {
+  static void mapping(IO &IO, YAMLXRayObjectMapping &Mapping) {
+    IO.mapRequired("num_object_bits", Mapping.NumObjBits);
+    IO.mapRequired("objects", Mapping.Objects);
+  }
+};
+} // end namespace yaml
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLXRayObjectMapEntry)
+
+namespace {
+
+Error ReadObjectMappingYAML(StringRef Filename, YAMLXRayObjectMapping& Mapping) {
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileBufferOrErr =
+      llvm::MemoryBuffer::getFile(ExtractInput);
+  if (!FileBufferOrErr) {
+    return joinErrors(make_error<StringError>(
+                          Twine("Cannot read object mapping YAML from '") +
+                              ExtractInput + "'.",
+                          std::make_error_code(std::errc::invalid_argument)),
+                      errorCodeToError(FileBufferOrErr.getError()));
+  }
+
+  yaml::Input In((*FileBufferOrErr)->getBuffer());
+  In >> Mapping;
+  if (In.error())
+    return make_error<StringError>(
+        Twine("Failed loading YAML document from '") + Filename + "'.",
+        In.error());
+  return Error::success();
+}
+
+struct IdMappingHelper {
+  IdMappingHelper(int NumObjBits) : NumObjBits(NumObjBits) {
+    assert(NumObjBits >= 0 && NumObjBits < 32 && "Invalid NumObjBits");
+    NumFnBits = 32 - NumObjBits;
+    ObjBitMask = (1l << NumObjBits) - 1;
+    FnBitMask = (1l << NumFnBits) - 1;
+  }
+
+  int32_t MapId(int32_t FnId, int32_t ObjId) const {
+    return ((ObjId & ObjBitMask) << NumFnBits) | (FnId & FnBitMask);
+  }
+private:
+  int NumObjBits;
+  int NumFnBits;
+  int32_t ObjBitMask;
+  int32_t FnBitMask;
+};
+
+
+void TranslateAndAppendSleds(const InstrumentationMap &Map,
+                             FuncIdConversionHelper &FH,
+                             int ObjId, const IdMappingHelper& IdMapping,
+                             std::vector<YAMLXRaySledEntry>& YAMLSleds) {
   auto Sleds = Map.sleds();
-  YAMLSleds.reserve(std::distance(Sleds.begin(), Sleds.end()));
+  auto SledCount = std::distance(Sleds.begin(), Sleds.end());
+  YAMLSleds.reserve(YAMLSleds.size() + SledCount);
   for (const auto &Sled : Sleds) {
     auto FuncId = Map.getFunctionId(Sled.Function);
     if (!FuncId)
       return;
+    auto MappedId = IdMapping.MapId(*FuncId, ObjId);
     YAMLSleds.push_back(
-        {*FuncId, Sled.Address, Sled.Function, Sled.Kind, Sled.AlwaysInstrument,
+        {MappedId, Sled.Address, Sled.Function, Sled.Kind, Sled.AlwaysInstrument,
          ExtractSymbolize ? FH.SymbolOrNumber(*FuncId) : "", Sled.Version});
   }
-  Output Out(OS, nullptr, 0);
-  Out << YAMLSleds;
 }
 
 } // namespace
 
 static CommandRegistration Unused(&Extract, []() -> Error {
-  auto InstrumentationMapOrError = loadInstrumentationMap(ExtractInput);
-  if (!InstrumentationMapOrError)
-    return joinErrors(make_error<StringError>(
-                          Twine("Cannot extract instrumentation map from '") +
-                              ExtractInput + "'.",
-                          std::make_error_code(std::errc::invalid_argument)),
-                      InstrumentationMapOrError.takeError());
+  int NumObjBits{0};
+  std::unordered_map<int, std::string> Inputs;
+  if (FromMapping) {
+    YAMLXRayObjectMapping ObjMapping;
+
+    auto Err = ReadObjectMappingYAML(ExtractInput, ObjMapping);
+    if (Err) {
+      return Err;
+    }
+    NumObjBits = ObjMapping.NumObjBits;
+    for (auto& Obj : ObjMapping.Objects) {
+      Inputs[Obj.ObjId] = Obj.Path;
+    }
+  } else {
+    Inputs[0] = ExtractInput;
+  }
+
+  IdMappingHelper IdMapping(NumObjBits);
+
+  symbolize::LLVMSymbolizer::Options opts;
+  if (Demangle.getPosition() < NoDemangle.getPosition())
+    opts.Demangle = false;
+  symbolize::LLVMSymbolizer Symbolizer(opts);
+
+  std::vector<YAMLXRaySledEntry> YAMLSleds;
+
+  for (auto& [ObjId, Path] : Inputs) {
+    auto InstrumentationMapOrError = loadInstrumentationMap(Path);
+    if (!InstrumentationMapOrError)
+      return joinErrors(make_error<StringError>(
+                            Twine("Cannot extract instrumentation map from '") +
+                                Path + "'.",
+                            std::make_error_code(std::errc::invalid_argument)),
+                        InstrumentationMapOrError.takeError());
+
+    const auto &FunctionAddresses =
+        InstrumentationMapOrError->getFunctionAddresses();
+
+    llvm::xray::FuncIdConversionHelper FuncIdHelper(Path, Symbolizer,
+                                                    FunctionAddresses);
+    TranslateAndAppendSleds(*InstrumentationMapOrError, FuncIdHelper,
+                            ObjId, IdMapping, YAMLSleds);
+  }
 
   std::error_code EC;
   raw_fd_ostream OS(ExtractOutput, EC, sys::fs::OpenFlags::OF_TextWithCRLF);
   if (EC)
     return make_error<StringError>(
         Twine("Cannot open file '") + ExtractOutput + "' for writing.", EC);
-  const auto &FunctionAddresses =
-      InstrumentationMapOrError->getFunctionAddresses();
-  symbolize::LLVMSymbolizer::Options opts;
-  if (Demangle.getPosition() < NoDemangle.getPosition())
-    opts.Demangle = false;
-  symbolize::LLVMSymbolizer Symbolizer(opts);
-  llvm::xray::FuncIdConversionHelper FuncIdHelper(ExtractInput, Symbolizer,
-                                                  FunctionAddresses);
-  exportAsYAML(*InstrumentationMapOrError, OS, FuncIdHelper);
+  Output Out(OS, nullptr, 0);
+  Out << YAMLSleds;
   return Error::success();
 });
+
+



More information about the llvm-commits mailing list