[compiler-rt] [llvm] Introduce XRay object mapping file to enable symbolization for DSOs (PR #133269)
Sebastian Kreutzer via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 27 08:56:11 PDT 2025
https://github.com/sebastiankreutzer created https://github.com/llvm/llvm-project/pull/133269
Prototype implementation for DSO function resolution for XRay.
- Adds the `__xray_object_path` API function to look up the binary path of a loaded DSO
- At the end of tracing, writes out a YAML file, containing the mapping of object IDs to binaries
- Extends `llvm-xray extract` to allow generating a combined instrumentation map file, given the mapping YAML as input
>From bc0ae8d83cc0a297b26b4dcb64814347a6b15d8b Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <SebastianKreutzer at gmx.net>
Date: Thu, 27 Mar 2025 16:42:20 +0100
Subject: [PATCH] [XRay] Prototype implementation of XRay object mapping file
to enable symbolization for DSOs
---
compiler-rt/include/xray/xray_interface.h | 6 +
compiler-rt/lib/xray/xray_basic_logging.cpp | 40 ++++-
compiler-rt/lib/xray/xray_init.cpp | 63 +++++++
compiler-rt/lib/xray/xray_interface.cpp | 35 ++++
.../lib/xray/xray_interface_internal.h | 3 +
compiler-rt/lib/xray/xray_utils.cpp | 8 +-
compiler-rt/lib/xray/xray_utils.h | 7 +-
llvm/tools/llvm-xray/xray-extract.cpp | 158 +++++++++++++++---
8 files changed, 287 insertions(+), 33 deletions(-)
diff --git a/compiler-rt/include/xray/xray_interface.h b/compiler-rt/include/xray/xray_interface.h
index 675ea0cbc48c8..f8b07df688989 100644
--- a/compiler-rt/include/xray/xray_interface.h
+++ b/compiler-rt/include/xray/xray_interface.h
@@ -165,6 +165,12 @@ extern int32_t __xray_unpack_object_id(int32_t PackedId);
/// high bits are truncated.
extern int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId);
+/// Returns the path from which the given object (executable or DSO) was loaded
+/// from.
+/// The returned string is owned by the XRay runtime and remains valid until the
+/// end of execution.
+extern const char* __xray_object_path(int32_t ObjId);
+
/// Initialize the required XRay data structures. This is useful in cases where
/// users want to control precisely when the XRay instrumentation data
/// structures are initialized, for example when the XRay library is built with
diff --git a/compiler-rt/lib/xray/xray_basic_logging.cpp b/compiler-rt/lib/xray/xray_basic_logging.cpp
index 6ac5417bef754..c11b9bd2d748e 100644
--- a/compiler-rt/lib/xray/xray_basic_logging.cpp
+++ b/compiler-rt/lib/xray/xray_basic_logging.cpp
@@ -439,15 +439,42 @@ XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize,
return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
}
+static void writeObjectMapping() {
+ LogWriter *LW = getGlobalLog();
+ if (!LW) {
+ Report("Log file was not initialized!\n");
+ return;
+ }
+
+ if (Verbosity())
+ Report("Writing object mapping file.\n");
+
+ char MapFilename[256] = {};
+ int NeededLength = internal_snprintf(
+ MapFilename, sizeof(MapFilename), "%s.map.yaml",
+ LW->GetFilename());
+ if (NeededLength > int(sizeof(MapFilename))) {
+ Report("XRay map file name too long (%d): %s\n", NeededLength, MapFilename);
+ return;
+ }
+
+ bool Success = __xray_write_object_mapping(MapFilename);
+
+ if (!Success)
+ Report("Failed to write the XRay object mapping to %s\n", MapFilename);
+}
+
XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT {
uint8_t Expected = 0;
if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0,
- memory_order_acq_rel) &&
- Verbosity())
- Report("Basic logging already finalized.\n");
+ memory_order_acq_rel)) {
+ if (Verbosity())
+ Report("Basic logging already finalized.\n");
+ return XRayLogInitStatus::XRAY_LOG_FINALIZED;
+ }
- // Nothing really to do aside from marking state of the global to be
- // uninitialized.
+ // Write the object mapping file and mark state as finalized.
+ writeObjectMapping();
return XRayLogInitStatus::XRAY_LOG_FINALIZED;
}
@@ -462,6 +489,7 @@ XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT {
void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT {
}
+
bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
XRayLogImpl Impl{
basicLoggingInit,
@@ -504,7 +532,7 @@ bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
pthread_once(&DynamicOnce, +[] {
static void *FakeTLD = nullptr;
FakeTLD = &getThreadLocalData();
- Atexit(+[] { TLDDestructor(FakeTLD); });
+ Atexit(+[] { TLDDestructor(FakeTLD); writeObjectMapping(); });
});
}
return true;
diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index 020bfe52b5320..abc24120e4ac6 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -12,9 +12,14 @@
//===----------------------------------------------------------------------===//
#include <fcntl.h>
+#include <string.h>
#include <strings.h>
#include <unistd.h>
+#ifdef __ELF__
+#include <link.h>
+#endif
+
#include "sanitizer_common/sanitizer_common.h"
#include "xray/xray_interface.h"
#include "xray_allocator.h"
@@ -62,6 +67,56 @@ atomic_uint8_t XRayFlagsInitialized{0};
// A mutex to allow only one thread to initialize the XRay data structures.
SpinMutex XRayInitMutex;
+
+namespace {
+
+struct DlIteratePhdrData {
+ intptr_t TargetSledAddr{0};
+ const char* Path{nullptr};
+ bool Found{false};
+};
+
+const char* DetectObjPath(bool IsDSO, uint64_t Addr) XRAY_NEVER_INSTRUMENT {
+ if (IsDSO) {
+ // Detection of DSO paths only supported for ELF
+#ifdef __ELF__
+ // Look for the given address by iterating over the loaded DSOs
+ DlIteratePhdrData Data;
+ Data.TargetSledAddr = Addr;
+ dl_iterate_phdr([](dl_phdr_info *info, size_t size, void *arg) -> int {
+ auto *data = (DlIteratePhdrData *)arg;
+ data->Found = false;
+ for (int i = 0; i < info->dlpi_phnum; i++) {
+ const auto *phdr = &info->dlpi_phdr[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+ intptr_t beg = info->dlpi_addr + phdr->p_vaddr;
+ intptr_t end = beg + phdr->p_memsz;
+
+ if (beg <= data->TargetSledAddr && data->TargetSledAddr < end) {
+ data->Path = info->dlpi_name;
+ data->Found = true;
+ return 1;
+ }
+ }
+ return 0;
+ }, &Data);
+ if (Data.Found) {
+ return Data.Path;
+ }
+#endif
+ return nullptr;
+ }
+
+ // If the address is in the main executable, use the built-in sanitizer
+ // functionality.
+ __sanitizer::UpdateProcessName();
+ const char* ExecName = __sanitizer::GetProcessName();
+ // TODO: Is this the full path?
+ return ExecName;
+}
+}
+
// Registers XRay sleds and trampolines coming from the main executable or one
// of the linked DSOs.
// Returns the object ID if registration is successful, -1 otherwise.
@@ -107,6 +162,14 @@ __xray_register_sleds(const XRaySledEntry *SledsBegin,
if (Verbosity())
Report("Registering %d new functions!\n", SledMap.Functions);
+ const char* Path = DetectObjPath(FromDSO, SledsBegin->address());
+ if (Path) {
+ // Copy needed because DSO path is freed when unloaded.
+ SledMap.Path = internal_strdup(Path);
+ } else {
+ Report("Unable to determine load path for address %x\n", SledsBegin->address());
+ }
+
{
SpinMutexLock Guard(&XRayInstrMapMutex);
auto Idx = atomic_fetch_add(&XRayNumObjects, 1, memory_order_acq_rel);
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index 3f97827874a70..7605ce743f1e6 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -688,3 +688,38 @@ int32_t __xray_unpack_object_id(int32_t PackedId) {
int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId) {
return __xray::MakePackedId(FuncId, ObjId);
}
+
+const char* __xray_object_path(int32_t ObjId) {
+ SpinMutexLock Guard(&XRayInstrMapMutex);
+ if (ObjId < 0 || static_cast<uint32_t>(ObjId) >=
+ atomic_load(&XRayNumObjects, memory_order_acquire))
+ return nullptr;
+ return XRayInstrMaps[ObjId].Path;
+}
+
+#include <stdio.h>
+
+bool __xray_write_object_mapping(const char* outfile) XRAY_NEVER_INSTRUMENT {
+ // TODO: Use low-level IO API?
+ FILE *of = fopen(outfile, "w");
+ if (!of) {
+ Report("Unable to write object mapping to file %s\n");
+ return false;
+ }
+
+ fprintf(of, "--- !XRayMapping\n");
+ fprintf(of, "num_object_bits: %d\n", __xray::XRayNObjBits);
+ fprintf(of, "objects:\n");
+ int NumObjects = __xray_num_objects();
+ for (int ObjId = 0; ObjId < NumObjects; ObjId++) {
+ auto* Path = __xray_object_path(ObjId);
+ if (!Path)
+ Report("Unknown path for object %d. The XRay mapping file may be "
+ "incomplete.\n", ObjId);
+ fprintf(of, " - id: %d\n", ObjId);
+ fprintf(of, " path: \"%s\"\n", Path ? Path : "");
+ }
+ fprintf(of, "...\n");
+ fclose(of);
+ return true;
+}
diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h
index 5dcccfe825cf5..5d3e4b435b25e 100644
--- a/compiler-rt/lib/xray/xray_interface_internal.h
+++ b/compiler-rt/lib/xray/xray_interface_internal.h
@@ -106,6 +106,8 @@ extern int32_t __xray_register_dso(const XRaySledEntry *SledsBegin,
XRayTrampolines Trampolines);
extern bool __xray_deregister_dso(int32_t ObjId);
+
+extern bool __xray_write_object_mapping(const char* outfile);
}
namespace __xray {
@@ -137,6 +139,7 @@ struct XRaySledMap {
XRayTrampolines Trampolines;
bool FromDSO;
bool Loaded;
+ const char* Path;
};
bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
diff --git a/compiler-rt/lib/xray/xray_utils.cpp b/compiler-rt/lib/xray/xray_utils.cpp
index 5d51df9937c2c..7563bc18a2db2 100644
--- a/compiler-rt/lib/xray/xray_utils.cpp
+++ b/compiler-rt/lib/xray/xray_utils.cpp
@@ -124,7 +124,7 @@ LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT {
Report("XRay: " FORMAT_DUMPFILE "\n", ProfileSinkName, VmoName);
LogWriter *LW = reinterpret_cast<LogWriter *>(InternalAlloc(sizeof(LogWriter)));
- new (LW) LogWriter(Vmo);
+ new (LW) LogWriter(Vmo, VmoName);
return LW;
}
@@ -155,6 +155,10 @@ void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUME
}
}
+const char* LogWriter::GetFilename() XRAY_NEVER_INSTRUMENT {
+ return Filename;
+}
+
void LogWriter::Flush() XRAY_NEVER_INSTRUMENT {
fsync(Fd);
}
@@ -187,7 +191,7 @@ LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT {
Report("XRay: Log file in '%s'\n", TmpFilename);
LogWriter *LW = allocate<LogWriter>();
- new (LW) LogWriter(Fd);
+ new (LW) LogWriter(Fd, TmpFilename);
return LW;
}
diff --git a/compiler-rt/lib/xray/xray_utils.h b/compiler-rt/lib/xray/xray_utils.h
index 5dc73d7fa8cde..309e4d8be8e33 100644
--- a/compiler-rt/lib/xray/xray_utils.h
+++ b/compiler-rt/lib/xray/xray_utils.h
@@ -31,13 +31,17 @@ class LogWriter {
#if SANITIZER_FUCHSIA
LogWriter(zx_handle_t Vmo) : Vmo(Vmo) {}
#else
- explicit LogWriter(int Fd) : Fd(Fd) {}
+ explicit LogWriter(int Fd, const char* Filename) : Fd(Fd) {
+ internal_strlcpy(this->Filename, Filename, sizeof(this->Filename));
+ }
#endif
~LogWriter();
// Write a character range into a log.
void WriteAll(const char *Begin, const char *End);
+ const char* GetFilename();
+
void Flush();
// Returns a new log instance initialized using the flag-provided values.
@@ -52,6 +56,7 @@ class LogWriter {
#else
int Fd = -1;
#endif
+ char Filename[256];
};
constexpr size_t gcd(size_t a, size_t b) {
diff --git a/llvm/tools/llvm-xray/xray-extract.cpp b/llvm/tools/llvm-xray/xray-extract.cpp
index 52767a00f6152..394c4f70516f8 100644
--- a/llvm/tools/llvm-xray/xray-extract.cpp
+++ b/llvm/tools/llvm-xray/xray-extract.cpp
@@ -21,8 +21,11 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
#include "llvm/XRay/InstrumentationMap.h"
+#include <assert.h>
+
using namespace llvm;
using namespace llvm::xray;
using namespace llvm::yaml;
@@ -51,51 +54,158 @@ static cl::opt<bool> Demangle("demangle",
static cl::opt<bool> NoDemangle("no-demangle",
cl::desc("don't demangle symbols"),
cl::sub(Extract));
+static cl::opt<bool> FromMapping("mapping", cl::init(false),
+ cl::desc("Create instrumentation map from object map YAML"),
+ cl::sub(Extract));
namespace {
-void exportAsYAML(const InstrumentationMap &Map, raw_ostream &OS,
- FuncIdConversionHelper &FH) {
- // First we translate the sleds into the YAMLXRaySledEntry objects in a deque.
- std::vector<YAMLXRaySledEntry> YAMLSleds;
+struct YAMLXRayObjectMapEntry {
+ int32_t ObjId;
+ std::string Path;
+};
+
+struct YAMLXRayObjectMapping {
+ int NumObjBits;
+ std::vector<YAMLXRayObjectMapEntry> Objects;
+};
+
+}
+
+namespace llvm{
+namespace yaml {
+template <> struct MappingTraits<YAMLXRayObjectMapEntry> {
+ static void mapping(IO &IO, YAMLXRayObjectMapEntry &Entry) {
+ IO.mapRequired("id", Entry.ObjId);
+ IO.mapRequired("path", Entry.Path);
+ }
+};
+
+template <> struct MappingTraits<YAMLXRayObjectMapping> {
+ static void mapping(IO &IO, YAMLXRayObjectMapping &Mapping) {
+ IO.mapRequired("num_object_bits", Mapping.NumObjBits);
+ IO.mapRequired("objects", Mapping.Objects);
+ }
+};
+} // end namespace yaml
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLXRayObjectMapEntry)
+
+namespace {
+
+Error ReadObjectMappingYAML(StringRef Filename, YAMLXRayObjectMapping& Mapping) {
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileBufferOrErr =
+ llvm::MemoryBuffer::getFile(ExtractInput);
+ if (!FileBufferOrErr) {
+ return joinErrors(make_error<StringError>(
+ Twine("Cannot read object mapping YAML from '") +
+ ExtractInput + "'.",
+ std::make_error_code(std::errc::invalid_argument)),
+ errorCodeToError(FileBufferOrErr.getError()));
+ }
+
+ yaml::Input In((*FileBufferOrErr)->getBuffer());
+ In >> Mapping;
+ if (In.error())
+ return make_error<StringError>(
+ Twine("Failed loading YAML document from '") + Filename + "'.",
+ In.error());
+ return Error::success();
+}
+
+struct IdMappingHelper {
+ IdMappingHelper(int NumObjBits) : NumObjBits(NumObjBits) {
+ assert(NumObjBits >= 0 && NumObjBits < 32 && "Invalid NumObjBits");
+ NumFnBits = 32 - NumObjBits;
+ ObjBitMask = (1l << NumObjBits) - 1;
+ FnBitMask = (1l << NumFnBits) - 1;
+ }
+
+ int32_t MapId(int32_t FnId, int32_t ObjId) const {
+ return ((ObjId & ObjBitMask) << NumFnBits) | (FnId & FnBitMask);
+ }
+private:
+ int NumObjBits;
+ int NumFnBits;
+ int32_t ObjBitMask;
+ int32_t FnBitMask;
+};
+
+
+void TranslateAndAppendSleds(const InstrumentationMap &Map,
+ FuncIdConversionHelper &FH,
+ int ObjId, const IdMappingHelper& IdMapping,
+ std::vector<YAMLXRaySledEntry>& YAMLSleds) {
auto Sleds = Map.sleds();
- YAMLSleds.reserve(std::distance(Sleds.begin(), Sleds.end()));
+ auto SledCount = std::distance(Sleds.begin(), Sleds.end());
+ YAMLSleds.reserve(YAMLSleds.size() + SledCount);
for (const auto &Sled : Sleds) {
auto FuncId = Map.getFunctionId(Sled.Function);
if (!FuncId)
return;
+ auto MappedId = IdMapping.MapId(*FuncId, ObjId);
YAMLSleds.push_back(
- {*FuncId, Sled.Address, Sled.Function, Sled.Kind, Sled.AlwaysInstrument,
+ {MappedId, Sled.Address, Sled.Function, Sled.Kind, Sled.AlwaysInstrument,
ExtractSymbolize ? FH.SymbolOrNumber(*FuncId) : "", Sled.Version});
}
- Output Out(OS, nullptr, 0);
- Out << YAMLSleds;
}
} // namespace
static CommandRegistration Unused(&Extract, []() -> Error {
- auto InstrumentationMapOrError = loadInstrumentationMap(ExtractInput);
- if (!InstrumentationMapOrError)
- return joinErrors(make_error<StringError>(
- Twine("Cannot extract instrumentation map from '") +
- ExtractInput + "'.",
- std::make_error_code(std::errc::invalid_argument)),
- InstrumentationMapOrError.takeError());
+ int NumObjBits{0};
+ std::unordered_map<int, std::string> Inputs;
+ if (FromMapping) {
+ YAMLXRayObjectMapping ObjMapping;
+
+ auto Err = ReadObjectMappingYAML(ExtractInput, ObjMapping);
+ if (Err) {
+ return Err;
+ }
+ NumObjBits = ObjMapping.NumObjBits;
+ for (auto& Obj : ObjMapping.Objects) {
+ Inputs[Obj.ObjId] = Obj.Path;
+ }
+ } else {
+ Inputs[0] = ExtractInput;
+ }
+
+ IdMappingHelper IdMapping(NumObjBits);
+
+ symbolize::LLVMSymbolizer::Options opts;
+ if (Demangle.getPosition() < NoDemangle.getPosition())
+ opts.Demangle = false;
+ symbolize::LLVMSymbolizer Symbolizer(opts);
+
+ std::vector<YAMLXRaySledEntry> YAMLSleds;
+
+ for (auto& [ObjId, Path] : Inputs) {
+ auto InstrumentationMapOrError = loadInstrumentationMap(Path);
+ if (!InstrumentationMapOrError)
+ return joinErrors(make_error<StringError>(
+ Twine("Cannot extract instrumentation map from '") +
+ Path + "'.",
+ std::make_error_code(std::errc::invalid_argument)),
+ InstrumentationMapOrError.takeError());
+
+ const auto &FunctionAddresses =
+ InstrumentationMapOrError->getFunctionAddresses();
+
+ llvm::xray::FuncIdConversionHelper FuncIdHelper(Path, Symbolizer,
+ FunctionAddresses);
+ TranslateAndAppendSleds(*InstrumentationMapOrError, FuncIdHelper,
+ ObjId, IdMapping, YAMLSleds);
+ }
std::error_code EC;
raw_fd_ostream OS(ExtractOutput, EC, sys::fs::OpenFlags::OF_TextWithCRLF);
if (EC)
return make_error<StringError>(
Twine("Cannot open file '") + ExtractOutput + "' for writing.", EC);
- const auto &FunctionAddresses =
- InstrumentationMapOrError->getFunctionAddresses();
- symbolize::LLVMSymbolizer::Options opts;
- if (Demangle.getPosition() < NoDemangle.getPosition())
- opts.Demangle = false;
- symbolize::LLVMSymbolizer Symbolizer(opts);
- llvm::xray::FuncIdConversionHelper FuncIdHelper(ExtractInput, Symbolizer,
- FunctionAddresses);
- exportAsYAML(*InstrumentationMapOrError, OS, FuncIdHelper);
+ Output Out(OS, nullptr, 0);
+ Out << YAMLSleds;
return Error::success();
});
+
+
More information about the llvm-commits
mailing list