[clang] [compiler-rt] Reapply " [XRay] Add support for instrumentation of DSOs on x86_64 (#90959)" (PR #112930)

Sebastian Kreutzer via cfe-commits cfe-commits at lists.llvm.org
Fri Oct 18 09:47:10 PDT 2024


https://github.com/sebastiankreutzer updated https://github.com/llvm/llvm-project/pull/112930

>From 7e4508ff9e5e5ef7d77aee40a7d0d4e21106d353 Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <SebastianKreutzer at gmx.net>
Date: Thu, 26 Oct 2023 15:13:05 +0200
Subject: [PATCH 1/3] [XRay] Add DSO support for XRay instrumentation on X86_64

---
 clang/include/clang/Basic/CodeGenOptions.def  |   2 +
 clang/include/clang/Driver/Options.td         |   5 +
 clang/include/clang/Driver/XRayArgs.h         |   4 +
 clang/lib/Driver/ToolChains/CommonArgs.cpp    |  12 +-
 clang/lib/Driver/XRayArgs.cpp                 |  21 ++
 clang/test/Driver/XRay/xray-shared.cpp        |  17 +
 .../cmake/Modules/AllSupportedArchDefs.cmake  |   1 +
 compiler-rt/cmake/config-ix.cmake             |   4 +
 compiler-rt/include/xray/xray_interface.h     |  55 +++-
 compiler-rt/lib/xray/CMakeLists.txt           |  86 +++++-
 compiler-rt/lib/xray/xray_dso_init.cpp        |  62 ++++
 compiler-rt/lib/xray/xray_init.cpp            | 183 +++++++++--
 compiler-rt/lib/xray/xray_interface.cpp       | 292 ++++++++++++++----
 .../lib/xray/xray_interface_internal.h        |  83 ++++-
 compiler-rt/lib/xray/xray_trampoline_x86_64.S |  24 +-
 compiler-rt/lib/xray/xray_x86_64.cpp          |  23 +-
 .../xray/TestCases/Posix/basic-mode-dso.cpp   |  47 +++
 .../TestCases/Posix/clang-xray-shared.cpp     |  14 +
 .../test/xray/TestCases/Posix/dlopen.cpp      | 107 +++++++
 .../xray/TestCases/Posix/dso-dep-chains.cpp   | 197 ++++++++++++
 .../TestCases/Posix/patch-premain-dso.cpp     |  45 +++
 .../Posix/patching-unpatching-dso.cpp         |  75 +++++
 22 files changed, 1215 insertions(+), 144 deletions(-)
 create mode 100644 clang/test/Driver/XRay/xray-shared.cpp
 create mode 100644 compiler-rt/lib/xray/xray_dso_init.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp

diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index eac831278ee20d..e45370bde74a5d 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -136,6 +136,8 @@ CODEGENOPT(XRayIgnoreLoops , 1, 0)
 ///< Emit the XRay function index section.
 CODEGENOPT(XRayFunctionIndex , 1, 1)
 
+///< Set when -fxray-shared is enabled
+CODEGENOPT(XRayShared , 1, 0)
 
 ///< Set the minimum number of instructions in a function to determine selective
 ///< XRay instrumentation.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 152c43d7908ff8..6748f7045566aa 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2948,6 +2948,11 @@ def fxray_selected_function_group :
   HelpText<"When using -fxray-function-groups, select which group of functions to instrument. Valid range is 0 to fxray-function-groups - 1">,
   MarshallingInfoInt<CodeGenOpts<"XRaySelectedFunctionGroup">, "0">;
 
+defm xray_shared : BoolFOption<"xray-shared",
+  CodeGenOpts<"XRayShared">, DefaultFalse,
+  PosFlag<SetTrue, [], [ClangOption, CC1Option],
+          "Enable shared library instrumentation with XRay">,
+  NegFlag<SetFalse>>;
 
 defm fine_grained_bitfield_accesses : BoolOption<"f", "fine-grained-bitfield-accesses",
   CodeGenOpts<"FineGrainedBitfieldAccesses">, DefaultFalse,
diff --git a/clang/include/clang/Driver/XRayArgs.h b/clang/include/clang/Driver/XRayArgs.h
index bdd3d979547eed..8fbcf469e5bad1 100644
--- a/clang/include/clang/Driver/XRayArgs.h
+++ b/clang/include/clang/Driver/XRayArgs.h
@@ -27,6 +27,7 @@ class XRayArgs {
   XRayInstrSet InstrumentationBundle;
   llvm::opt::Arg *XRayInstrument = nullptr;
   bool XRayRT = true;
+  bool XRayShared = false;
 
 public:
   /// Parses the XRay arguments from an argument list.
@@ -35,6 +36,9 @@ class XRayArgs {
                llvm::opt::ArgStringList &CmdArgs, types::ID InputType) const;
 
   bool needsXRayRt() const { return XRayInstrument && XRayRT; }
+  bool needsXRayDSORt() const {
+    return XRayInstrument && XRayRT && XRayShared;
+  }
   llvm::ArrayRef<std::string> modeList() const { return Modes; }
   XRayInstrSet instrumentationBundle() const { return InstrumentationBundle; }
 };
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 91605a67a37fc0..1c3c8c816594e5 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1623,10 +1623,14 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
 }
 
 bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringList &CmdArgs) {
-  if (Args.hasArg(options::OPT_shared))
-    return false;
-
-  if (TC.getXRayArgs().needsXRayRt()) {
+  if (Args.hasArg(options::OPT_shared)) {
+    if (TC.getXRayArgs().needsXRayDSORt()) {
+      CmdArgs.push_back("--whole-archive");
+      CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray-dso"));
+      CmdArgs.push_back("--no-whole-archive");
+      return true;
+    }
+  } else if (TC.getXRayArgs().needsXRayRt()) {
     CmdArgs.push_back("--whole-archive");
     CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray"));
     for (const auto &Mode : TC.getXRayArgs().modeList())
diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp
index 8c5134e2501358..411054e067cb42 100644
--- a/clang/lib/Driver/XRayArgs.cpp
+++ b/clang/lib/Driver/XRayArgs.cpp
@@ -63,6 +63,23 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
         << XRayInstrument->getSpelling() << Triple.str();
   }
 
+  if (Args.hasFlag(options::OPT_fxray_shared,
+                   options::OPT_fno_xray_shared, false)) {
+    XRayShared = true;
+
+    // DSO instrumentation is currently limited to x86_64
+    if (Triple.getArch() != llvm::Triple::x86_64) {
+      D.Diag(diag::err_drv_unsupported_opt_for_target)
+          << "-fxray-shared" << Triple.str();
+    }
+
+    unsigned PICLvl = std::get<1>(tools::ParsePICArgs(TC, Args));
+    if (!PICLvl) {
+      D.Diag(diag::err_opt_not_valid_without_opt)
+          << "-fxray-shared" << "-fPIC";
+    }
+  }
+
   // Both XRay and -fpatchable-function-entry use
   // TargetOpcode::PATCHABLE_FUNCTION_ENTER.
   if (Arg *A = Args.getLastArg(options::OPT_fpatchable_function_entry_EQ))
@@ -177,6 +194,10 @@ void XRayArgs::addArgs(const ToolChain &TC, const ArgList &Args,
   Args.addOptOutFlag(CmdArgs, options::OPT_fxray_function_index,
                      options::OPT_fno_xray_function_index);
 
+  if (XRayShared)
+    Args.addOptInFlag(CmdArgs, options::OPT_fxray_shared,
+                      options::OPT_fno_xray_shared);
+
   if (const Arg *A =
           Args.getLastArg(options::OPT_fxray_instruction_threshold_EQ)) {
     int Value;
diff --git a/clang/test/Driver/XRay/xray-shared.cpp b/clang/test/Driver/XRay/xray-shared.cpp
new file mode 100644
index 00000000000000..215854e1fc7cef
--- /dev/null
+++ b/clang/test/Driver/XRay/xray-shared.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fpic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-PIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC
+// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-pic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC
+
+// On 64 bit darwin, PIC is always enabled
+// RUN: %clang -### --target=x86_64-apple-darwin -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+
+// Check unsupported targets
+// RUN: not %clang -### --target=aarch64-pc-freebsd -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
+// RUN: not %clang -### --target=arm64-apple-macos -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
+
+// CHECK: "-cc1" {{.*}}"-fxray-instrument" {{.*}}"-fxray-shared"
+// ERR-TARGET:   error: unsupported option '-fxray-shared' for target
+// ERR-PIC:   error: option '-fxray-shared' cannot be specified without '-fPIC'
+
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index d00d39518104bf..fb4dfa7bd09dfe 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -104,6 +104,7 @@ else()
 set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
 		powerpc64le ${HEXAGON} ${LOONGARCH64})
 endif()
+set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64})
 set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})
 
 if (UNIX)
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index a494e0532a50bc..431f544e8ad6a7 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -668,6 +668,9 @@ if(APPLE)
   list_intersect(XRAY_SUPPORTED_ARCH
     ALL_XRAY_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(XRAY_DSO_SUPPORTED_ARCH
+    ALL_XRAY_DSO_SUPPORTED_ARCH
+    SANITIZER_COMMON_SUPPORTED_ARCH)
   list_intersect(SHADOWCALLSTACK_SUPPORTED_ARCH
     ALL_SHADOWCALLSTACK_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
@@ -702,6 +705,7 @@ else()
   filter_available_targets(CFI_SUPPORTED_ARCH ${ALL_CFI_SUPPORTED_ARCH})
   filter_available_targets(SCUDO_STANDALONE_SUPPORTED_ARCH ${ALL_SCUDO_STANDALONE_SUPPORTED_ARCH})
   filter_available_targets(XRAY_SUPPORTED_ARCH ${ALL_XRAY_SUPPORTED_ARCH})
+  filter_available_targets(XRAY_DSO_SUPPORTED_ARCH ${ALL_XRAY_DSO_SUPPORTED_ARCH})
   filter_available_targets(SHADOWCALLSTACK_SUPPORTED_ARCH
     ${ALL_SHADOWCALLSTACK_SUPPORTED_ARCH})
   filter_available_targets(GWP_ASAN_SUPPORTED_ARCH ${ALL_GWP_ASAN_SUPPORTED_ARCH})
diff --git a/compiler-rt/include/xray/xray_interface.h b/compiler-rt/include/xray/xray_interface.h
index 727431c04e4f73..717cfe292ce416 100644
--- a/compiler-rt/include/xray/xray_interface.h
+++ b/compiler-rt/include/xray/xray_interface.h
@@ -93,31 +93,74 @@ enum XRayPatchingStatus {
   FAILED = 3,
 };
 
-/// This tells XRay to patch the instrumentation points. See XRayPatchingStatus
+/// This tells XRay to patch the instrumentation points in all currently loaded objects. See XRayPatchingStatus
 /// for possible result values.
 extern XRayPatchingStatus __xray_patch();
 
+/// This tells XRay to patch the instrumentation points in the given object.
+/// See XRayPatchingStatus for possible result values.
+extern XRayPatchingStatus __xray_patch_object(int32_t ObjId);
+
 /// Reverses the effect of __xray_patch(). See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_unpatch();
 
-/// This patches a specific function id. See XRayPatchingStatus for possible
+/// Reverses the effect of __xray_patch_object. See XRayPatchingStatus for possible
+/// result values.
+extern XRayPatchingStatus __xray_unpatch_object(int32_t ObjId);
+
+/// This unpacks the given (packed) function id and patches
+/// the corresponding function.  See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_patch_function(int32_t FuncId);
 
-/// This unpatches a specific function id. See XRayPatchingStatus for possible
+/// This patches a specific function in the given object. See XRayPatchingStatus for possible
+/// result values.
+extern XRayPatchingStatus __xray_patch_function_in_object(int32_t FuncId,
+                                                          int32_t ObjId);
+
+/// This unpacks the given (packed) function id and unpatches
+/// the corresponding function. See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_unpatch_function(int32_t FuncId);
 
-/// This function returns the address of the function provided a valid function
-/// id. We return 0 if we encounter any error, even if 0 may be a valid function
+/// This unpatches a specific function in the given object.
+/// See XRayPatchingStatus for possible result values.
+extern XRayPatchingStatus __xray_unpatch_function_in_object(int32_t FuncId,
+                                                            int32_t ObjId);
+
+/// This function unpacks the given (packed) function id and returns the address of the corresponding function. We return 0 if we encounter any error, even if 0 may be a valid function
 /// address.
 extern uintptr_t __xray_function_address(int32_t FuncId);
 
-/// This function returns the maximum valid function id. Returns 0 if we
+/// This function returns the address of the function in the given object provided valid function and object
+/// ids. We return 0 if we encounter any error, even if 0 may be a valid function
+/// address.
+extern uintptr_t __xray_function_address_in_object(int32_t FuncId,
+                                                   int32_t ObjId);
+
+/// This function returns the maximum valid function id for the main executable (object id = 0). Returns 0 if we
 /// encounter errors (when there are no instrumented functions, etc.).
 extern size_t __xray_max_function_id();
 
+/// This function returns the maximum valid function id for the given object. Returns 0 if we
+/// encounter errors (when there are no instrumented functions, etc.).
+extern size_t __xray_max_function_id_in_object(int32_t ObjId);
+
+/// This function returns the number of previously registered objects (executable + loaded DSOs).
+/// Returns 0 if XRay has not been initialized.
+extern size_t __xray_num_objects();
+
+/// Unpacks the function id from the given packed id.
+extern int32_t __xray_unpack_function_id(int32_t PackedId);
+
+/// Unpacks the object id from the given packed id.
+extern int32_t __xray_unpack_object_id(int32_t PackedId);
+
+/// Creates and returns a packed id from the given function and object ids.
+/// If the ids do not fit within the reserved number of bits for each part, the high bits are truncated.
+extern int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId);
+
 /// Initialize the required XRay data structures. This is useful in cases where
 /// users want to control precisely when the XRay instrumentation data
 /// structures are initialized, for example when the XRay library is built with
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index cf7b5062aae32d..f38c07420c9abf 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -10,6 +10,10 @@ set(XRAY_SOURCES
   xray_utils.cpp
   )
 
+set(XRAY_DSO_SOURCES
+  xray_dso_init.cpp
+  )
+
 # Implementation files for all XRay modes.
 set(XRAY_FDR_MODE_SOURCES
   xray_fdr_flags.cpp
@@ -33,6 +37,11 @@ set(x86_64_SOURCES
   xray_trampoline_x86_64.S
   )
 
+set(x86_64_DSO_SOURCES
+   xray_trampoline_x86_64.S
+   )
+
+
 set(arm_SOURCES
   xray_arm.cpp
   xray_trampoline_arm.S
@@ -128,10 +137,12 @@ set(XRAY_IMPL_HEADERS
 # consumption by tests.
 set(XRAY_ALL_SOURCE_FILES
   ${XRAY_SOURCES}
+  ${XRAY_DSO_SOURCES}
   ${XRAY_FDR_MODE_SOURCES}
   ${XRAY_BASIC_MODE_SOURCES}
   ${XRAY_PROFILING_MODE_SOURCES}
   ${x86_64_SOURCES}
+  ${x86_64_DSO_SOURCES}
   ${arm_SOURCES}
   ${armhf_SOURCES}
   ${hexagon_SOURCES}
@@ -162,6 +173,9 @@ set(XRAY_CFLAGS
   ${COMPILER_RT_CXX_CFLAGS})
 set(XRAY_COMMON_DEFINITIONS SANITIZER_COMMON_NO_REDEFINE_BUILTINS XRAY_HAS_EXCEPTIONS=1)
 
+# DSO trampolines need to be compiled with GOT addressing
+set(XRAY_COMMON_DEFINITIONS_DSO ${XRAY_COMMON_DEFINITIONS} XRAY_PIC)
+
 # Too many existing bugs, needs cleanup.
 append_list_if(COMPILER_RT_HAS_WNO_FORMAT -Wno-format XRAY_CFLAGS)
 
@@ -201,7 +215,16 @@ if (APPLE)
     CFLAGS ${XRAY_CFLAGS}
     DEFS ${XRAY_COMMON_DEFINITIONS}
     DEPS ${XRAY_DEPS})
+  add_compiler_rt_object_libraries(RTXrayDSO
+    OS ${XRAY_SUPPORTED_OS}
+    ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+    SOURCES ${XRAY_DSO_SOURCES}
+    ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+    CFLAGS ${XRAY_CFLAGS}
+    DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+    DEPS ${XRAY_DEPS})
   set(XRAY_RTXRAY_ARCH_LIBS "")
+  set(XRAY_DSO_RTXRAY_ARCH_LIBS "")
   foreach(arch ${XRAY_SUPPORTED_ARCH})
     if(NOT ${arch} IN_LIST XRAY_SOURCE_ARCHS)
       continue()
@@ -215,6 +238,17 @@ if (APPLE)
       DEFS ${XRAY_COMMON_DEFINITIONS}
       DEPS ${XRAY_DEPS})
     list(APPEND XRAY_RTXRAY_ARCH_LIBS RTXray_${arch})
+    if (${arch} IN_LIST XRAY_DSO_SUPPORTED_ARCH)
+      add_compiler_rt_object_libraries(RTXrayDSO_${arch}
+        OS ${XRAY_SUPPORTED_OS}
+        ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+        SOURCES ${${arch}_DSO_SOURCES}
+        ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+        CFLAGS ${XRAY_CFLAGS}
+        DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+        DEPS ${XRAY_DEPS})
+      list(APPEND XRAY_DSO_RTXRAY_ARCH_LIBS RTXrayDSO_${arch})
+    endif()
   endforeach()
   add_compiler_rt_object_libraries(RTXrayFDR
     OS ${XRAY_SUPPORTED_OS}
@@ -252,6 +286,17 @@ if (APPLE)
     LINK_FLAGS ${XRAY_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
     LINK_LIBS ${XRAY_LINK_LIBS}
     PARENT_TARGET xray)
+  add_compiler_rt_runtime(clang_rt.xray-dso
+    STATIC
+    OS ${XRAY_SUPPORTED_OS}
+    ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+    OBJECT_LIBS RTXrayDSO ${XRAY_DSO_RTXRAY_ARCH_LIBS}
+    CFLAGS ${XRAY_CFLAGS}
+    DEFS ${XRAY_COMMON_DEFINITIONS}
+    LINK_FLAGS ${XRAY_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
+    LINK_LIBS ${XRAY_LINK_LIBS}
+    PARENT_TARGET xray)
+
   add_compiler_rt_runtime(clang_rt.xray-fdr
     STATIC
     OS ${XRAY_SUPPORTED_OS}
@@ -346,16 +391,37 @@ else() # not Apple
       DEFS ${XRAY_COMMON_DEFINITIONS}
       OBJECT_LIBS RTXrayBASIC
       PARENT_TARGET xray)
-   # Profiler Mode runtime
-   add_compiler_rt_runtime(clang_rt.xray-profiling
-     STATIC
-     ARCHS ${arch}
-     CFLAGS ${XRAY_CFLAGS}
-     LINK_FLAGS ${XRAY_LINK_FLAGS}
-     LINK_LIBS ${XRAY_LINK_LIBS}
-     DEFS ${XRAY_COMMON_DEFINITIONS}
-     OBJECT_LIBS RTXrayPROFILING
-     PARENT_TARGET xray)
+    # Profiler Mode runtime
+    add_compiler_rt_runtime(clang_rt.xray-profiling
+      STATIC
+      ARCHS ${arch}
+      CFLAGS ${XRAY_CFLAGS}
+      LINK_FLAGS ${XRAY_LINK_FLAGS}
+      LINK_LIBS ${XRAY_LINK_LIBS}
+      DEFS ${XRAY_COMMON_DEFINITIONS}
+      OBJECT_LIBS RTXrayPROFILING
+      PARENT_TARGET xray)
+
+    if (${arch} IN_LIST XRAY_DSO_SUPPORTED_ARCH)
+      # TODO: Only implemented for X86 at the moment
+      add_compiler_rt_object_libraries(RTXrayDSO
+        ARCHS ${arch}
+        SOURCES ${XRAY_DSO_SOURCES} ${${arch}_DSO_SOURCES} 
+        ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+        CFLAGS ${XRAY_CFLAGS}
+        DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+        DEPS ${XRAY_DEPS})
+      # DSO runtime archive
+      add_compiler_rt_runtime(clang_rt.xray-dso
+        STATIC
+        ARCHS ${arch}
+        CFLAGS ${XRAY_CFLAGS}
+        LINK_FLAGS ${XRAY_LINK_FLAGS}
+        LINK_LIBS ${XRAY_LINK_LIBS}
+        DEFS ${XRAY_COMMON_DEFINITIONS}
+        OBJECT_LIBS RTXrayDSO
+        PARENT_TARGET xray)
+    endif()
   endforeach()
 endif() # not Apple
 
diff --git a/compiler-rt/lib/xray/xray_dso_init.cpp b/compiler-rt/lib/xray/xray_dso_init.cpp
new file mode 100644
index 00000000000000..eb754db54c64fa
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_dso_init.cpp
@@ -0,0 +1,62 @@
+//===-- xray_init.cpp -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay initialisation logic for DSOs.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+
+using namespace __sanitizer;
+
+extern "C" {
+extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+
+#if SANITIZER_APPLE
+// HACK: This is a temporary workaround to make XRay build on
+// Darwin, but it will probably not work at runtime.
+extern const XRaySledEntry __start_xray_instr_map[] = {};
+extern const XRaySledEntry __stop_xray_instr_map[] = {};
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] = {};
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] = {};
+#endif
+}
+
+// Handler functions to call in the patched entry/exit sled.
+extern atomic_uintptr_t XRayPatchedFunction;
+extern atomic_uintptr_t XRayArgLogger;
+extern atomic_uintptr_t XRayPatchedCustomEvent;
+extern atomic_uintptr_t XRayPatchedTypedEvent;
+
+static int __xray_object_id{-1};
+
+// Note: .preinit_array initialization does not work for DSOs
+__attribute__((constructor(0))) static void
+__xray_init_dso() XRAY_NEVER_INSTRUMENT {
+  // Register sleds in main XRay runtime.
+  __xray_object_id =
+      __xray_register_dso(__start_xray_instr_map, __stop_xray_instr_map,
+                          __start_xray_fn_idx, __stop_xray_fn_idx, {});
+}
+
+__attribute__((destructor(0))) static void
+__xray_finalize_dso() XRAY_NEVER_INSTRUMENT {
+  // Inform the main runtime that this DSO is no longer used.
+  __xray_deregister_dso(__xray_object_id);
+}
diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index f22a31b95686d0..53c93be89cd148 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -16,6 +16,8 @@
 #include <unistd.h>
 
 #include "sanitizer_common/sanitizer_common.h"
+#include "xray/xray_interface.h"
+#include "xray_allocator.h"
 #include "xray_defs.h"
 #include "xray_flags.h"
 #include "xray_interface_internal.h"
@@ -28,7 +30,7 @@ extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak));
 extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak));
 
 #if SANITIZER_APPLE
-// HACK: This is a temporary workaround to make XRay build on 
+// HACK: This is a temporary workaround to make XRay build on
 // Darwin, but it will probably not work at runtime.
 const XRaySledEntry __start_xray_instr_map[] = {};
 extern const XRaySledEntry __stop_xray_instr_map[] = {};
@@ -43,14 +45,16 @@ using namespace __xray;
 // the weak symbols defined above (__start_xray_inst_map and
 // __stop_xray_instr_map) to initialise the instrumentation map that XRay uses
 // for runtime patching/unpatching of instrumentation points.
-//
-// FIXME: Support DSO instrumentation maps too. The current solution only works
-// for statically linked executables.
 atomic_uint8_t XRayInitialized{0};
 
 // This should always be updated before XRayInitialized is updated.
 SpinMutex XRayInstrMapMutex;
-XRaySledMap XRayInstrMap;
+
+//  Contains maps for the main executable as well as DSOs.
+XRaySledMap *XRayInstrMaps;
+
+// Number of binary objects registered.
+atomic_uint32_t XRayNumObjects{0};
 
 // Global flag to determine whether the flags have been initialized.
 atomic_uint8_t XRayFlagsInitialized{0};
@@ -58,6 +62,63 @@ atomic_uint8_t XRayFlagsInitialized{0};
 // A mutex to allow only one thread to initialize the XRay data structures.
 SpinMutex XRayInitMutex;
 
+// Registers XRay sleds and trampolines coming from the main executable or one
+// of the linked DSOs.
+// Returns the object ID if registration is successful, -1 otherwise.
+int32_t
+__xray_register_sleds(const XRaySledEntry *SledsBegin,
+                      const XRaySledEntry *SledsEnd,
+                      const XRayFunctionSledIndex *FnIndexBegin,
+                      const XRayFunctionSledIndex *FnIndexEnd, bool FromDSO,
+                      XRayTrampolines Trampolines) XRAY_NEVER_INSTRUMENT {
+  if (!SledsBegin || !SledsEnd) {
+    Report("Invalid XRay sleds.\n");
+    return -1;
+  }
+  XRaySledMap SledMap;
+  SledMap.FromDSO = FromDSO;
+  SledMap.Loaded = true;
+  SledMap.Trampolines = Trampolines;
+  SledMap.Sleds = SledsBegin;
+  SledMap.Entries = SledsEnd - SledsBegin;
+  if (FnIndexBegin != nullptr) {
+    SledMap.SledsIndex = FnIndexBegin;
+    SledMap.Functions = FnIndexEnd - FnIndexBegin;
+  } else {
+    size_t CountFunctions = 0;
+    uint64_t LastFnAddr = 0;
+
+    for (std::size_t I = 0; I < SledMap.Entries; I++) {
+      const auto &Sled = SledMap.Sleds[I];
+      const auto Function = Sled.function();
+      if (Function != LastFnAddr) {
+        CountFunctions++;
+        LastFnAddr = Function;
+      }
+    }
+    SledMap.SledsIndex = nullptr;
+    SledMap.Functions = CountFunctions;
+  }
+  if (SledMap.Functions >= XRayMaxFunctions) {
+    Report("Too many functions! Maximum is %ld\n", XRayMaxFunctions);
+    return -1;
+  }
+
+  if (Verbosity())
+    Report("Registering %d new functions!\n", SledMap.Functions);
+
+  {
+    SpinMutexLock Guard(&XRayInstrMapMutex);
+    auto Idx = atomic_fetch_add(&XRayNumObjects, 1, memory_order_acq_rel);
+    if (Idx >= XRayMaxObjects) {
+      Report("Too many objects registered! Maximum is %ld\n", XRayMaxObjects);
+      return -1;
+    }
+    XRayInstrMaps[Idx] = std::move(SledMap);
+    return Idx;
+  }
+}
+
 // __xray_init() will do the actual loading of the current process' memory map
 // and then proceed to look for the .xray_instr_map section/segment.
 void __xray_init() XRAY_NEVER_INSTRUMENT {
@@ -80,29 +141,21 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
     return;
   }
 
-  {
-    SpinMutexLock Guard(&XRayInstrMapMutex);
-    XRayInstrMap.Sleds = __start_xray_instr_map;
-    XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
-    if (__start_xray_fn_idx != nullptr) {
-      XRayInstrMap.SledsIndex = __start_xray_fn_idx;
-      XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx;
-    } else {
-      size_t CountFunctions = 0;
-      uint64_t LastFnAddr = 0;
-
-      for (std::size_t I = 0; I < XRayInstrMap.Entries; I++) {
-        const auto &Sled = XRayInstrMap.Sleds[I];
-        const auto Function = Sled.function();
-        if (Function != LastFnAddr) {
-          CountFunctions++;
-          LastFnAddr = Function;
-        }
-      }
+  atomic_store(&XRayNumObjects, 0, memory_order_release);
 
-      XRayInstrMap.Functions = CountFunctions;
-    }
+  // Pre-allocation takes up approx. 5kB for XRayMaxObjects=64.
+  XRayInstrMaps = allocateBuffer<XRaySledMap>(XRayMaxObjects);
+
+  int MainBinaryId =
+      __xray_register_sleds(__start_xray_instr_map, __stop_xray_instr_map,
+                            __start_xray_fn_idx, __stop_xray_fn_idx, false, {});
+
+  // The executable should always get ID 0.
+  if (MainBinaryId != 0) {
+    Report("Registering XRay sleds failed.\n");
+    return;
   }
+
   atomic_store(&XRayInitialized, true, memory_order_release);
 
 #ifndef XRAY_NO_PREINIT
@@ -111,6 +164,84 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
 #endif
 }
 
+// Registers XRay sleds and trampolines of an instrumented DSO.
+// Returns the object ID if registration is successful, -1 otherwise.
+//
+// Default visibility is hidden, so we have to explicitly make it visible to
+// DSO.
+SANITIZER_INTERFACE_ATTRIBUTE int32_t __xray_register_dso(
+    const XRaySledEntry *SledsBegin, const XRaySledEntry *SledsEnd,
+    const XRayFunctionSledIndex *FnIndexBegin,
+    const XRayFunctionSledIndex *FnIndexEnd,
+    XRayTrampolines Trampolines) XRAY_NEVER_INSTRUMENT {
+  // Make sure XRay has been initialized in the main executable.
+  __xray_init();
+
+  if (__xray_num_objects() == 0) {
+    if (Verbosity())
+      Report("No XRay instrumentation map in main executable. Not initializing "
+             "XRay for DSO.\n");
+    return -1;
+  }
+
+  // Register sleds in global map.
+  int ObjId = __xray_register_sleds(SledsBegin, SledsEnd, FnIndexBegin,
+                                    FnIndexEnd, true, Trampolines);
+
+#ifndef XRAY_NO_PREINIT
+  if (ObjId >= 0 && flags()->patch_premain)
+    __xray_patch_object(ObjId);
+#endif
+
+  return ObjId;
+}
+
+// Deregisters a DSO from the main XRay runtime.
+// Called from the DSO-local runtime when the library is unloaded (e.g. if
+// dlclose is called).
+// Returns true if the object ID is valid and the DSO was successfully
+// deregistered.
+SANITIZER_INTERFACE_ATTRIBUTE bool
+__xray_deregister_dso(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+
+  if (!atomic_load(&XRayInitialized, memory_order_acquire)) {
+    if (Verbosity())
+      Report("XRay has not been initialized. Cannot deregister DSO.\n");
+    return false;
+  }
+
+  if (ObjId <= 0 || ObjId >= __xray_num_objects()) {
+    if (Verbosity())
+      Report("Can't deregister object with ID %d: ID is invalid.\n", ObjId);
+    return false;
+  }
+
+  {
+    SpinMutexLock Guard(&XRayInstrMapMutex);
+    auto &Entry = XRayInstrMaps[ObjId];
+    if (!Entry.FromDSO) {
+      if (Verbosity())
+        Report("Can't deregister object with ID %d: object does not correspond "
+               "to a shared library.\n",
+               ObjId);
+      return false;
+    }
+    if (!Entry.Loaded) {
+      if (Verbosity())
+        Report("Can't deregister object with ID %d: object is not loaded.\n",
+               ObjId);
+      return true;
+    }
+    // Mark DSO as unloaded. No need to unpatch.
+    Entry.Loaded = false;
+  }
+
+  if (Verbosity())
+    Report("Deregistered object with ID %d.\n", ObjId);
+
+  return true;
+}
+
 // FIXME: Make check-xray tests work on FreeBSD without
 // SANITIZER_CAN_USE_PREINIT_ARRAY.
 // See sanitizer_internal_defs.h where the macro is defined.
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index 5839043fcb93a8..16e60bfc22cd10 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "xray_interface_internal.h"
+#include "llvm/Support/ErrorHandling.h"
 
 #include <cinttypes>
 #include <cstdio>
@@ -36,7 +37,8 @@
 
 extern __sanitizer::SpinMutex XRayInstrMapMutex;
 extern __sanitizer::atomic_uint8_t XRayInitialized;
-extern __xray::XRaySledMap XRayInstrMap;
+extern __xray::XRaySledMap *XRayInstrMaps;
+extern __sanitizer::atomic_uint32_t XRayNumObjects;
 
 namespace __xray {
 
@@ -61,16 +63,16 @@ static const int16_t cSledLength = 20;
 #endif /* CPU architecture */
 
 // This is the function to call when we encounter the entry or exit sleds.
-atomic_uintptr_t XRayPatchedFunction{0};
+atomic_uintptr_t XRayPatchedFunction SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call from the arg1-enabled sleds/trampolines.
-atomic_uintptr_t XRayArgLogger{0};
+atomic_uintptr_t XRayArgLogger SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call when we encounter a custom event log call.
-atomic_uintptr_t XRayPatchedCustomEvent{0};
+atomic_uintptr_t XRayPatchedCustomEvent SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call when we encounter a typed event log call.
-atomic_uintptr_t XRayPatchedTypedEvent{0};
+atomic_uintptr_t XRayPatchedTypedEvent SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the global status to determine whether we are currently
 // patching/unpatching.
@@ -150,27 +152,42 @@ class MProtectHelper {
 
 namespace {
 
-bool patchSled(const XRaySledEntry &Sled, bool Enable,
-               int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+bool isObjectLoaded(int32_t ObjId) {
+  SpinMutexLock Guard(&XRayInstrMapMutex);
+  if (ObjId < 0 ||
+      ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+    return false;
+  }
+  return XRayInstrMaps[ObjId].Loaded;
+}
+
+bool patchSled(const XRaySledEntry &Sled, bool Enable, int32_t FuncId,
+               const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
   bool Success = false;
   switch (Sled.Kind) {
   case XRayEntryType::ENTRY:
-    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry);
+    Success =
+        patchFunctionEntry(Enable, FuncId, Sled, Trampolines.EntryTrampoline);
     break;
   case XRayEntryType::EXIT:
-    Success = patchFunctionExit(Enable, FuncId, Sled);
+    Success =
+        patchFunctionExit(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
     break;
   case XRayEntryType::TAIL:
-    Success = patchFunctionTailExit(Enable, FuncId, Sled);
+    Success = patchFunctionTailExit(Enable, FuncId, Sled,
+                                    Trampolines.TailExitTrampoline);
     break;
   case XRayEntryType::LOG_ARGS_ENTRY:
-    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry);
+    Success =
+        patchFunctionEntry(Enable, FuncId, Sled, Trampolines.LogArgsTrampoline);
     break;
   case XRayEntryType::CUSTOM_EVENT:
-    Success = patchCustomEvent(Enable, FuncId, Sled);
+    Success = patchCustomEvent(Enable, FuncId, Sled,
+                               Trampolines.CustomEventTrampoline);
     break;
   case XRayEntryType::TYPED_EVENT:
-    Success = patchTypedEvent(Enable, FuncId, Sled);
+    Success =
+        patchTypedEvent(Enable, FuncId, Sled, Trampolines.TypedEventTrampoline);
     break;
   default:
     Report("Unsupported sled kind '%" PRIu64 "' @%04x\n", Sled.Address,
@@ -205,10 +222,9 @@ findFunctionSleds(int32_t FuncId,
   return Index;
 }
 
-XRayPatchingStatus patchFunction(int32_t FuncId,
+XRayPatchingStatus patchFunction(int32_t FuncId, int32_t ObjId,
                                  bool Enable) XRAY_NEVER_INSTRUMENT {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
     return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
 
   uint8_t NotPatching = false;
@@ -220,13 +236,24 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch function: invalid sled map index: %d", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
 
   // If we don't have an index, we can't patch individual functions.
   if (InstrMap.Functions == 0)
     return XRayPatchingStatus::NOT_INITIALIZED;
 
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Invalid function id provided: %d\n", FuncId);
+    return XRayPatchingStatus::NOT_INITIALIZED;
+  }
+
   // FuncId must be a positive number, less than the number of functions
   // instrumented.
   if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) {
@@ -234,6 +261,8 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
     return XRayPatchingStatus::FAILED;
   }
 
+  auto PackedId = __xray::MakePackedId(FuncId, ObjId);
+
   // Now we patch ths sleds for this specific function.
   XRayFunctionSledIndex SledRange;
   if (InstrMap.SledsIndex) {
@@ -242,13 +271,13 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
   } else {
     SledRange = findFunctionSleds(FuncId, InstrMap);
   }
+
   auto *f = SledRange.Begin;
   bool SucceedOnce = false;
   for (size_t i = 0; i != SledRange.Size; ++i)
-    SucceedOnce |= patchSled(f[i], Enable, FuncId);
+    SucceedOnce |= patchSled(f[i], Enable, PackedId, InstrMap.Trampolines);
 
-  atomic_store(&XRayPatching, false,
-                            memory_order_release);
+  atomic_store(&XRayPatching, false, memory_order_release);
 
   if (!SucceedOnce) {
     Report("Failed patching any sled for function '%d'.", FuncId);
@@ -261,32 +290,31 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
 // controlPatching implements the common internals of the patching/unpatching
 // implementation. |Enable| defines whether we're enabling or disabling the
 // runtime XRay instrumentation.
-XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
-    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
-
-  uint8_t NotPatching = false;
-  if (!atomic_compare_exchange_strong(
-          &XRayPatching, &NotPatching, true, memory_order_acq_rel))
-    return XRayPatchingStatus::ONGOING; // Already patching.
-
-  uint8_t PatchingSuccess = false;
-  auto XRayPatchingStatusResetter =
-      at_scope_exit([&PatchingSuccess] {
-        if (!PatchingSuccess)
-          atomic_store(&XRayPatching, false,
-                                    memory_order_release);
-      });
-
+// This function should only be called after ensuring that XRay is initialized
+// and no other thread is currently patching.
+XRayPatchingStatus controlPatchingObjectUnchecked(bool Enable, int32_t ObjId) {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch functions: invalid sled map index: %d\n", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
   if (InstrMap.Entries == 0)
     return XRayPatchingStatus::NOT_INITIALIZED;
 
+  if (Verbosity())
+    Report("Patching object %d with %d functions.\n", ObjId, InstrMap.Entries);
+
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Object is not loaded at index: %d\n", ObjId);
+    return XRayPatchingStatus::FAILED;
+  }
+
   uint32_t FuncId = 1;
   uint64_t CurFun = 0;
 
@@ -336,20 +364,96 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
       ++FuncId;
       CurFun = F;
     }
-    patchSled(Sled, Enable, FuncId);
+    auto PackedId = __xray::MakePackedId(FuncId, ObjId);
+    patchSled(Sled, Enable, PackedId, InstrMap.Trampolines);
   }
-  atomic_store(&XRayPatching, false,
-                            memory_order_release);
-  PatchingSuccess = true;
+  atomic_store(&XRayPatching, false, memory_order_release);
   return XRayPatchingStatus::SUCCESS;
 }
 
-XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
+// Controls patching for all registered objects.
+// Returns: SUCCESS, if patching succeeds for all objects.
+//          NOT_INITIALIZED, if one or more objects returned NOT_INITIALIZED
+//             but none failed.
+//          FAILED, if patching of one or more objects failed.
+XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
+    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+  uint8_t NotPatching = false;
+  if (!atomic_compare_exchange_strong(&XRayPatching, &NotPatching, true,
+                                      memory_order_acq_rel))
+    return XRayPatchingStatus::ONGOING; // Already patching.
+
+  auto XRayPatchingStatusResetter = at_scope_exit(
+      [] { atomic_store(&XRayPatching, false, memory_order_release); });
+
+  unsigned NumObjects = __xray_num_objects();
+
+  XRayPatchingStatus CombinedStatus{NOT_INITIALIZED};
+  for (unsigned I = 0; I < NumObjects; ++I) {
+    if (!isObjectLoaded(I))
+      continue;
+    auto LastStatus = controlPatchingObjectUnchecked(Enable, I);
+    switch (LastStatus) {
+    case SUCCESS:
+      if (CombinedStatus == NOT_INITIALIZED)
+        CombinedStatus = SUCCESS;
+      break;
+    case FAILED:
+      // Report failure, but try to patch the remaining objects
+      CombinedStatus = FAILED;
+      break;
+    case NOT_INITIALIZED:
+      // XRay has been initialized but there are no sleds available for this
+      // object. Try to patch remaining objects.
+      if (CombinedStatus != FAILED)
+        CombinedStatus = NOT_INITIALIZED;
+      break;
+    case ONGOING:
+      llvm_unreachable("Status ONGOING should not appear at this point");
+    default:
+      llvm_unreachable("Unhandled patching status");
+    }
+  }
+  return CombinedStatus;
+}
+
+// Controls patching for one object.
+XRayPatchingStatus controlPatching(bool Enable,
+                                   int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
+    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+  uint8_t NotPatching = false;
+  if (!atomic_compare_exchange_strong(&XRayPatching, &NotPatching, true,
+                                      memory_order_acq_rel))
+    return XRayPatchingStatus::ONGOING; // Already patching.
+
+  auto XRayPatchingStatusResetter = at_scope_exit(
+      [] { atomic_store(&XRayPatching, false, memory_order_release); });
+
+  return controlPatchingObjectUnchecked(Enable, ObjId);
+}
+
+XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId, int32_t ObjId,
                                             bool Enable) XRAY_NEVER_INSTRUMENT {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch function: invalid sled map index: %d\n", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
+  }
+
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Object is not loaded at index: %d\n", ObjId);
+    return XRayPatchingStatus::FAILED;
   }
 
   // FuncId must be a positive number, less than the number of functions
@@ -398,7 +502,7 @@ XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
     Report("Failed mprotect: %d\n", errno);
     return XRayPatchingStatus::FAILED;
   }
-  return patchFunction(FuncId, Enable);
+  return patchFunction(FuncId, ObjId, Enable);
 }
 
 } // namespace
@@ -412,12 +516,10 @@ using namespace __xray;
 
 int __xray_set_handler(void (*entry)(int32_t,
                                      XRayEntryType)) XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
 
     atomic_store(&__xray::XRayPatchedFunction,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -425,11 +527,9 @@ int __xray_set_handler(void (*entry)(int32_t,
 
 int __xray_set_customevent_handler(void (*entry)(void *, size_t))
     XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
     atomic_store(&__xray::XRayPatchedCustomEvent,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -437,11 +537,9 @@ int __xray_set_customevent_handler(void (*entry)(void *, size_t))
 
 int __xray_set_typedevent_handler(void (*entry)(size_t, const void *,
                                                 size_t)) XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
     atomic_store(&__xray::XRayPatchedTypedEvent,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -474,39 +572,78 @@ XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT {
   return controlPatching(true);
 }
 
+XRayPatchingStatus __xray_patch_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return controlPatching(true, ObjId);
+}
+
 XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT {
   return controlPatching(false);
 }
 
+XRayPatchingStatus __xray_unpatch_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return controlPatching(false, ObjId);
+}
+
 XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
-  return mprotectAndPatchFunction(FuncId, true);
+  auto Ids = __xray::UnpackId(FuncId);
+  auto ObjId = Ids.first;
+  auto FnId = Ids.second;
+  return mprotectAndPatchFunction(FnId, ObjId, true);
+}
+
+XRayPatchingStatus
+__xray_patch_function_in_object(int32_t FuncId,
+                                int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return mprotectAndPatchFunction(FuncId, ObjId, true);
 }
 
 XRayPatchingStatus
 __xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
-  return mprotectAndPatchFunction(FuncId, false);
+  auto Ids = __xray::UnpackId(FuncId);
+  auto ObjId = Ids.first;
+  auto FnId = Ids.second;
+  return mprotectAndPatchFunction(FnId, ObjId, false);
+}
+
+XRayPatchingStatus
+__xray_unpatch_function_in_object(int32_t FuncId,
+                                  int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return mprotectAndPatchFunction(FuncId, ObjId, false);
 }
 
 int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
     return 0;
 
   // A relaxed write might not be visible even if the current thread gets
   // scheduled on a different CPU/NUMA node.  We need to wait for everyone to
   // have this handler installed for consistency of collected data across CPUs.
   atomic_store(&XRayArgLogger, reinterpret_cast<uint64_t>(entry),
-                            memory_order_release);
+               memory_order_release);
   return 1;
 }
 
 int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); }
 
-uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+uintptr_t
+__xray_function_address(int32_t CombinedFuncId) XRAY_NEVER_INSTRUMENT {
+  auto Ids = __xray::UnpackId(CombinedFuncId);
+  return __xray_function_address_in_object(Ids.second, Ids.first);
+}
+
+uintptr_t __xray_function_address_in_object(int32_t FuncId, int32_t ObjId)
+    XRAY_NEVER_INSTRUMENT {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    auto count = atomic_load(&XRayNumObjects, memory_order_acquire);
+    if (ObjId < 0 || ObjId >= count) {
+      Report("Unable to determine function address: invalid sled map index %d "
+             "(size is %d)\n",
+             ObjId, (int)count);
+      return 0;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
 
   if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions)
@@ -525,6 +662,29 @@ uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
 }
 
 size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT {
+  return __xray_max_function_id_in_object(0);
+}
+
+size_t __xray_max_function_id_in_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  SpinMutexLock Guard(&XRayInstrMapMutex);
+  if (ObjId < 0 || ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire))
+    return 0;
+  return XRayInstrMaps[ObjId].Functions;
+}
+
+size_t __xray_num_objects() XRAY_NEVER_INSTRUMENT {
   SpinMutexLock Guard(&XRayInstrMapMutex);
-  return XRayInstrMap.Functions;
+  return atomic_load(&XRayNumObjects, memory_order_acquire);
+}
+
+int32_t __xray_unpack_function_id(int32_t PackedId) {
+  return __xray::UnpackId(PackedId).second;
+}
+
+int32_t __xray_unpack_object_id(int32_t PackedId) {
+  return __xray::UnpackId(PackedId).first;
+}
+
+int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId) {
+  return __xray::MakePackedId(FuncId, ObjId);
 }
diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h
index 80c07c167f6461..5fbaa9c3f315b1 100644
--- a/compiler-rt/lib/xray/xray_interface_internal.h
+++ b/compiler-rt/lib/xray/xray_interface_internal.h
@@ -18,6 +18,18 @@
 #include "xray/xray_interface.h"
 #include <cstddef>
 #include <cstdint>
+#include <utility>
+
+extern "C" {
+// The following functions have to be defined in assembler, on a per-platform
+// basis. See xray_trampoline_*.S files for implementations.
+extern void __xray_FunctionEntry();
+extern void __xray_FunctionExit();
+extern void __xray_FunctionTailExit();
+extern void __xray_ArgLoggerEntry();
+extern void __xray_CustomEvent();
+extern void __xray_TypedEvent();
+}
 
 extern "C" {
 
@@ -67,36 +79,77 @@ struct XRayFunctionSledIndex {
                                                    uintptr_t(Begin));
   }
 };
+
+struct XRayTrampolines {
+  void (*EntryTrampoline)();
+  void (*ExitTrampoline)();
+  void (*TailExitTrampoline)();
+  void (*LogArgsTrampoline)();
+  void (*CustomEventTrampoline)();
+  void (*TypedEventTrampoline)();
+
+  XRayTrampolines() {
+    // These resolve to the definitions in the respective executable or DSO.
+    EntryTrampoline = __xray_FunctionEntry;
+    ExitTrampoline = __xray_FunctionExit;
+    TailExitTrampoline = __xray_FunctionTailExit;
+    LogArgsTrampoline = __xray_ArgLoggerEntry;
+    CustomEventTrampoline = __xray_CustomEvent;
+    TypedEventTrampoline = __xray_TypedEvent;
+  }
+};
+
+extern int32_t __xray_register_dso(const XRaySledEntry *SledsBegin,
+                                   const XRaySledEntry *SledsEnd,
+                                   const XRayFunctionSledIndex *FnIndexBegin,
+                                   const XRayFunctionSledIndex *FnIndexEnd,
+                                   XRayTrampolines Trampolines);
+
+extern bool __xray_deregister_dso(int32_t ObjId);
 }
 
 namespace __xray {
 
+constexpr uint32_t XRayNFnBits = 24;
+constexpr uint32_t XRayNObjBits = 8;
+
+constexpr uint32_t XRayFnBitMask = 0x00FFFFFF;
+constexpr uint32_t XRayObjBitMask = 0xFF000000;
+
+constexpr size_t XRayMaxFunctions = 1 << XRayNFnBits;
+constexpr size_t XRayMaxObjects = 1 << XRayNObjBits;
+
+inline int32_t MakePackedId(int32_t FnId, int32_t ObjId) {
+  return ((ObjId << XRayNFnBits) & XRayObjBitMask) | (FnId & XRayFnBitMask);
+}
+
+inline std::pair<int32_t, int32_t> UnpackId(int32_t PackedId) {
+  uint32_t ObjId = (PackedId & XRayObjBitMask) >> XRayNFnBits;
+  uint32_t FnId = PackedId & XRayFnBitMask;
+  return {ObjId, FnId};
+}
+
 struct XRaySledMap {
   const XRaySledEntry *Sleds;
   size_t Entries;
   const XRayFunctionSledIndex *SledsIndex;
   size_t Functions;
+  XRayTrampolines Trampolines;
+  bool FromDSO;
+  bool Loaded;
 };
 
 bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
                         void (*Trampoline)());
-bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                       void (*Trampoline)());
 bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
-                           const XRaySledEntry &Sled);
-bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
-bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+                           const XRaySledEntry &Sled, void (*Trampoline)());
+bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                      void (*Trampoline)());
+bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                     void (*Trampoline)());
 
 } // namespace __xray
 
-extern "C" {
-// The following functions have to be defined in assembler, on a per-platform
-// basis. See xray_trampoline_*.S files for implementations.
-extern void __xray_FunctionEntry();
-extern void __xray_FunctionExit();
-extern void __xray_FunctionTailExit();
-extern void __xray_ArgLoggerEntry();
-extern void __xray_CustomEvent();
-extern void __xray_TypedEvent();
-}
-
 #endif
diff --git a/compiler-rt/lib/xray/xray_trampoline_x86_64.S b/compiler-rt/lib/xray/xray_trampoline_x86_64.S
index 01098f60eeab8b..0f480547b52cc6 100644
--- a/compiler-rt/lib/xray/xray_trampoline_x86_64.S
+++ b/compiler-rt/lib/xray/xray_trampoline_x86_64.S
@@ -107,6 +107,16 @@
 	.section __TEXT,__text
 #endif
 
+.macro LOAD_HANDLER_ADDR handler
+#if !defined(XRAY_PIC)
+	movq	ASM_SYMBOL(\handler)(%rip), %rax
+#else
+	movq	ASM_SYMBOL(\handler)@GOTPCREL(%rip), %rax
+	movq	(%rax), %rax
+#endif
+.endm
+
+
 //===----------------------------------------------------------------------===//
 
 	.globl ASM_SYMBOL(__xray_FunctionEntry)
@@ -121,7 +131,7 @@ ASM_SYMBOL(__xray_FunctionEntry):
 
 	// This load has to be atomic, it's concurrent with __xray_patch().
 	// On x86/amd64, a simple (type-aligned) MOV instruction is enough.
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq	%rax, %rax
 	je	LOCAL_LABEL(tmp0)
 
@@ -159,7 +169,7 @@ ASM_SYMBOL(__xray_FunctionExit):
 	movupd	%xmm1, 16(%rsp)
 	movq	%rax, 8(%rsp)
 	movq	%rdx, 0(%rsp)
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq %rax,%rax
 	je	LOCAL_LABEL(tmp2)
 
@@ -195,7 +205,7 @@ ASM_SYMBOL(__xray_FunctionTailExit):
 	SAVE_REGISTERS
 	ALIGN_STACK_16B
 
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq %rax,%rax
 	je	LOCAL_LABEL(tmp4)
 
@@ -224,12 +234,12 @@ ASM_SYMBOL(__xray_ArgLoggerEntry):
 	ALIGN_STACK_16B
 
 	// Again, these function pointer loads must be atomic; MOV is fine.
-	movq	ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray13XRayArgLoggerE
 	testq	%rax, %rax
 	jne	LOCAL_LABEL(arg1entryLog)
 
 	// If [arg1 logging handler] not set, defer to no-arg logging.
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq	%rax, %rax
 	je	LOCAL_LABEL(arg1entryFail)
 
@@ -268,7 +278,7 @@ ASM_SYMBOL(__xray_CustomEvent):
 
 	// We take two arguments to this trampoline, which should be in rdi	and rsi
 	// already.
-	movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray22XRayPatchedCustomEventE
 	testq %rax,%rax
 	je LOCAL_LABEL(customEventCleanup)
 
@@ -293,7 +303,7 @@ ASM_SYMBOL(__xray_TypedEvent):
 
 	// We pass three arguments to this trampoline, which should be in rdi, rsi
 	// and rdx without our intervention.
-	movq ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray21XRayPatchedTypedEventE
 	testq %rax,%rax
 	je LOCAL_LABEL(typedEventCleanup)
 
diff --git a/compiler-rt/lib/xray/xray_x86_64.cpp b/compiler-rt/lib/xray/xray_x86_64.cpp
index b9666a40861d48..663a51b2686614 100644
--- a/compiler-rt/lib/xray/xray_x86_64.cpp
+++ b/compiler-rt/lib/xray/xray_x86_64.cpp
@@ -170,7 +170,8 @@ bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                       const XRaySledEntry &Sled,
+                       void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
@@ -192,11 +193,11 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
   // Prerequisite is to compute the relative offset fo the
   // __xray_FunctionExit function's address.
   const uint64_t Address = Sled.address();
-  int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
+  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
                              (static_cast<int64_t>(Address) + 11);
   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
     Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
-           reinterpret_cast<void *>(__xray_FunctionExit),
+           reinterpret_cast<void *>(Trampoline),
            reinterpret_cast<void *>(Address));
     return false;
   }
@@ -217,16 +218,16 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                           const XRaySledEntry &Sled,
+                           void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the tail call sled with a similar
   // sequence as the entry sled, but calls the tail exit sled instead.
   const uint64_t Address = Sled.address();
-  int64_t TrampolineOffset =
-      reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
-      (static_cast<int64_t>(Address) + 11);
+  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
+                             (static_cast<int64_t>(Address) + 11);
   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
     Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
-           reinterpret_cast<void *>(__xray_FunctionTailExit),
+           reinterpret_cast<void *>(Trampoline),
            reinterpret_cast<void *>(Address));
     return false;
   }
@@ -247,7 +248,8 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
-                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                      const XRaySledEntry &Sled,
+                      void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
@@ -275,7 +277,8 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
-                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                     const XRaySledEntry &Sled,
+                     void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
diff --git a/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
new file mode 100644
index 00000000000000..31c615bd1f81bf
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
@@ -0,0 +1,47 @@
+// Testing shared library support in basic logging mode.
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=false,xray_mode=xray-basic,xray_logfile_base=basic-mode-dso-,verbosity=1" XRAY_BASIC_OPTIONS="func_duration_threshold_us=0" %run %t/main.o 2>&1 | FileCheck %s
+// RUN: %llvm_xray account --format=csv --sort=funcid "`ls basic-mode-dso-* | head -1`" | FileCheck --check-prefix=ACCOUNT %s
+// RUN: rm basic-mode-dso-*
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+#include <unistd.h>
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+  sleep(1);
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  // Explicit patching to ensure the DSO has been loaded
+  __xray_patch();
+  instrumented_in_executable();
+  // CHECK: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+#include <unistd.h>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}
+
+// ACCOUNT: funcid,count,min,median,90%ile,99%ile,max,sum,debug,function
+// ACCOUNT-NEXT: 1,1,{{.*}}
+// ACCOUNT-NEXT: 16777217,1,{{.*}}
diff --git a/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp b/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp
new file mode 100644
index 00000000000000..92f3c29e970d42
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp
@@ -0,0 +1,14 @@
+// Test that the DSO-local runtime library has been linked if -fxray-shared is passed.
+//
+// RUN: %clangxx -fxray-instrument -fxray-shared %s -shared -o %t.so
+// RUN: llvm-nm %t.so | FileCheck %s --check-prefix ENABLED
+
+// RUN: %clangxx -fxray-instrument %s -shared -o %t.so
+// RUN: llvm-nm %t.so | FileCheck %s --check-prefix DISABLED
+//
+// REQUIRES: target=x86_64{{.*}}
+
+[[clang::xray_always_instrument]] int always_instrumented() { return 42; }
+
+// ENABLED: __start_xray_instr_map
+// DISABLED-NOT: __start_xray_instr_map
diff --git a/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp b/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
new file mode 100644
index 00000000000000..9db411d5ff1c6e
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
@@ -0,0 +1,107 @@
+// Check that we can patch and un-patch DSOs loaded with dlopen.
+//
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -rdynamic -fxray-instrument -fxray-shared -std=c++11 %t/main.cpp -o %t/main.o
+//
+// RUN: XRAY_OPTIONS="patch_premain=true" %run %t/main.o %t/testlib.so 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+#include <dlfcn.h>
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+typedef void (*dso_func_type)();
+
+int main(int argc, char **argv) {
+  if (argc < 2) {
+    printf("Shared library argument missing\n");
+    // CHECK-NOT: Shared library argument missing
+    return 1;
+  }
+
+  const char *dso_path = argv[1];
+
+  void *dso_handle = dlopen(dso_path, RTLD_LAZY);
+  if (!dso_handle) {
+    printf("Failed to load shared library\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+      return 1;
+    }
+    return 1;
+  }
+
+  dso_func_type instrumented_in_dso =
+      (dso_func_type)dlsym(dso_handle, "_Z19instrumented_in_dsov");
+  if (!instrumented_in_dso) {
+    printf("Failed to find symbol\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+      return 1;
+    }
+    return 1;
+  }
+
+  __xray_set_handler(test_handler);
+
+  instrumented_in_executable();
+  // CHECK: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+
+  auto status = __xray_unpatch();
+  printf("unpatching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: unpatching status: 1
+
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+
+  status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+
+  instrumented_in_executable();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+
+  dlclose(dso_handle);
+
+  status = __xray_unpatch();
+  printf("unpatching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: unpatching status: 1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}
diff --git a/compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp b/compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp
new file mode 100644
index 00000000000000..89da2764c35cee
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp
@@ -0,0 +1,197 @@
+// Check that loading libraries with different modes (RTLD_LOCAL/RTLD_GLOBAL)
+// and dependencies on other DSOs work correctly.
+//
+
+// RUN: split-file %s %t
+//
+// Build shared libs with dependencies b->c and e->f
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testliba.cpp -o %t/testliba.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlibc.cpp -o %t/testlibc.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlibb.cpp %t/testlibc.so -o %t/testlibb.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlibd.cpp -o %t/testlibd.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlibf.cpp -o %t/testlibf.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlibe.cpp %t/testlibf.so -o %t/testlibe.so
+//
+// Executable links with a and b explicitly and loads d and e at runtime.
+// RUN: %clangxx_xray -g -fPIC -rdynamic -fxray-instrument -fxray-shared -std=c++11 %t/main.cpp %t/testliba.so %t/testlibb.so -o %t/main.o
+//
+// RUN:  XRAY_OPTIONS="patch_premain=true" %run %t/main.o %t/testlibd.so %t/testlibe.so 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+#include <dlfcn.h>
+
+[[clang::xray_never_instrument]] void test_handler(int32_t fid,
+                                                   XRayEntryType type) {
+  printf("called: %d, object=%d, fn=%d, type=%d\n", fid, (fid >> 24) & 0xFF,
+         fid & 0x00FFFFFF, static_cast<int32_t>(type));
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+typedef void (*dso_func_type)();
+
+[[clang::xray_never_instrument]] void *load_dso(const char *path, int mode) {
+  void *dso_handle = dlopen(path, mode);
+  if (!dso_handle) {
+    printf("failed to load shared library\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+    }
+    return nullptr;
+  }
+  return dso_handle;
+}
+
+[[clang::xray_never_instrument]] void find_and_call(void *dso_handle,
+                                                    const char *fn) {
+  dso_func_type dso_fn = (dso_func_type)dlsym(dso_handle, fn);
+  if (!dso_fn) {
+    printf("failed to find symbol\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+    }
+    return;
+  }
+  dso_fn();
+}
+
+extern void a();
+extern void b();
+
+int main(int argc, char **argv) {
+
+  if (argc < 3) {
+    printf("Shared library arguments missing\n");
+    // CHECK-NOT: Shared library arguments missing
+    return 1;
+  }
+
+  const char *dso_path_d = argv[1];
+  const char *dso_path_e = argv[2];
+
+  __xray_set_handler(test_handler);
+
+  instrumented_in_executable();
+  // CHECK: called: {{[0-9]+}}, object=0, fn={{[0-9]+}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=0, fn={{[0-9]+}}, type=1
+
+  a();
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ1:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: a called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ1]], fn=1, type=1
+
+  // Make sure this object ID does not appear again
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ1]]
+
+  b(); // b calls c
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ2:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: b called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ3:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: c called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ3]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ3]]
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ2]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ2]]
+
+  // Now check explicit loading with RTLD_LOCAL
+
+  void *dso_handle_d = load_dso(dso_path_d, RTLD_LAZY | RTLD_LOCAL);
+  void *dso_handle_e = load_dso(dso_path_e, RTLD_LAZY | RTLD_LOCAL);
+  // CHECK-NOT: failed to load shared library
+
+  find_and_call(dso_handle_d, "_Z1dv");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ4:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: d called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ4]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ4]]
+
+  find_and_call(dso_handle_e, "_Z1ev");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ5:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: e called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ6:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: f called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ6]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ6]]
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ5]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ5]]
+
+  // Unload DSOs
+  dlclose(dso_handle_d);
+  dlclose(dso_handle_e);
+
+  // Repeat test with RTLD_GLOBAL
+  dso_handle_d = load_dso(dso_path_d, RTLD_LAZY | RTLD_GLOBAL);
+  dso_handle_e = load_dso(dso_path_e, RTLD_LAZY | RTLD_GLOBAL);
+  // CHECK-NOT: failed to load shared library
+
+  find_and_call(dso_handle_d, "_Z1dv");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ7:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: d called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ7]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ7]]
+
+  find_and_call(dso_handle_e, "_Z1ev");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ8:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: e called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ9:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: f called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ9]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ9]]
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ8]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ8]]
+
+  auto status = __xray_unpatch();
+  printf("unpatching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: unpatching status: 1
+
+  dlclose(dso_handle_d);
+  dlclose(dso_handle_e);
+}
+
+//--- libgenmacro.inc
+#include <cstdio>
+// Helper macros to quickly generate libraries containing a single function.
+#define GENERATE_LIB(NAME)                                                     \
+  [[clang::xray_always_instrument]] void NAME() { printf(#NAME " called\n"); }
+
+#define GENERATE_LIB_WITH_CALL(NAME, FN)                                       \
+  extern void FN();                                                            \
+  [[clang::xray_always_instrument]] void NAME() {                              \
+    printf(#NAME " called\n");                                                 \
+    FN();                                                                      \
+  }
+
+//--- testliba.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(a)
+
+//--- testlibb.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB_WITH_CALL(b, c)
+
+//--- testlibc.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(c)
+
+//--- testlibd.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(d)
+
+//--- testlibe.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB_WITH_CALL(e, f)
+
+//--- testlibf.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(f)
diff --git a/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
new file mode 100644
index 00000000000000..0708d0383439d0
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
@@ -0,0 +1,45 @@
+// Checking that DSOs are automatically patched upon load, if patch_premain is passed.
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=true,verbosity=1" %run %t/main.o 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  __xray_set_handler(test_handler);
+  instrumented_in_executable();
+  // CHECK: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}
diff --git a/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp
new file mode 100644
index 00000000000000..d3e992dd497725
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp
@@ -0,0 +1,75 @@
+// Check that we can patch and un-patch on demand, and that logging gets invoked
+// appropriately.
+//
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=false" %run %t/main.o 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+
+bool called = false;
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+  called = true;
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  __xray_set_handler(test_handler);
+  instrumented_in_executable();
+  // CHECK: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK: instrumented_in_dso called
+  auto status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  instrumented_in_executable();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  status = __xray_unpatch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+  status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  __xray_remove_handler();
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+  status = __xray_unpatch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}

>From 90aa32cfaac5274d7f1604baa4d5316401385f4c Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <sebastian.kreutzer at tu-darmstadt.de>
Date: Fri, 18 Oct 2024 17:59:22 +0200
Subject: [PATCH 2/3] fixup! [XRay] Add DSO support for XRay instrumentation on
 X86_64

---
 clang/include/clang/Driver/XRayArgs.h         |  4 +-
 clang/lib/Driver/XRayArgs.cpp                 |  8 ++--
 compiler-rt/include/xray/xray_interface.h     | 40 +++++++++--------
 compiler-rt/lib/xray/xray_AArch64.cpp         | 19 +++++---
 compiler-rt/lib/xray/xray_arm.cpp             | 19 +++++---
 compiler-rt/lib/xray/xray_hexagon.cpp         | 19 +++++---
 compiler-rt/lib/xray/xray_init.cpp            |  2 +-
 compiler-rt/lib/xray/xray_interface.cpp       | 44 ++++++++-----------
 .../lib/xray/xray_interface_internal.h        | 17 +++----
 compiler-rt/lib/xray/xray_loongarch64.cpp     | 23 +++++++---
 compiler-rt/lib/xray/xray_mips.cpp            | 23 +++++++---
 compiler-rt/lib/xray/xray_mips64.cpp          | 23 +++++++---
 compiler-rt/lib/xray/xray_powerpc64.cpp       | 28 +++++++++---
 compiler-rt/lib/xray/xray_x86_64.cpp          | 25 ++++++-----
 14 files changed, 173 insertions(+), 121 deletions(-)

diff --git a/clang/include/clang/Driver/XRayArgs.h b/clang/include/clang/Driver/XRayArgs.h
index 8fbcf469e5bad1..1b5c4a4c42f12a 100644
--- a/clang/include/clang/Driver/XRayArgs.h
+++ b/clang/include/clang/Driver/XRayArgs.h
@@ -36,9 +36,7 @@ class XRayArgs {
                llvm::opt::ArgStringList &CmdArgs, types::ID InputType) const;
 
   bool needsXRayRt() const { return XRayInstrument && XRayRT; }
-  bool needsXRayDSORt() const {
-    return XRayInstrument && XRayRT && XRayShared;
-  }
+  bool needsXRayDSORt() const { return XRayInstrument && XRayRT && XRayShared; }
   llvm::ArrayRef<std::string> modeList() const { return Modes; }
   XRayInstrSet instrumentationBundle() const { return InstrumentationBundle; }
 };
diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp
index 411054e067cb42..d0bb5d4887c184 100644
--- a/clang/lib/Driver/XRayArgs.cpp
+++ b/clang/lib/Driver/XRayArgs.cpp
@@ -63,8 +63,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
         << XRayInstrument->getSpelling() << Triple.str();
   }
 
-  if (Args.hasFlag(options::OPT_fxray_shared,
-                   options::OPT_fno_xray_shared, false)) {
+  if (Args.hasFlag(options::OPT_fxray_shared, options::OPT_fno_xray_shared,
+                   false)) {
     XRayShared = true;
 
     // DSO instrumentation is currently limited to x86_64
@@ -75,8 +75,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
 
     unsigned PICLvl = std::get<1>(tools::ParsePICArgs(TC, Args));
     if (!PICLvl) {
-      D.Diag(diag::err_opt_not_valid_without_opt)
-          << "-fxray-shared" << "-fPIC";
+      D.Diag(diag::err_opt_not_valid_without_opt) << "-fxray-shared"
+                                                  << "-fPIC";
     }
   }
 
diff --git a/compiler-rt/include/xray/xray_interface.h b/compiler-rt/include/xray/xray_interface.h
index 717cfe292ce416..675ea0cbc48c83 100644
--- a/compiler-rt/include/xray/xray_interface.h
+++ b/compiler-rt/include/xray/xray_interface.h
@@ -93,8 +93,8 @@ enum XRayPatchingStatus {
   FAILED = 3,
 };
 
-/// This tells XRay to patch the instrumentation points in all currently loaded objects. See XRayPatchingStatus
-/// for possible result values.
+/// This tells XRay to patch the instrumentation points in all currently loaded
+/// objects. See XRayPatchingStatus for possible result values.
 extern XRayPatchingStatus __xray_patch();
 
 /// This tells XRay to patch the instrumentation points in the given object.
@@ -105,8 +105,8 @@ extern XRayPatchingStatus __xray_patch_object(int32_t ObjId);
 /// result values.
 extern XRayPatchingStatus __xray_unpatch();
 
-/// Reverses the effect of __xray_patch_object. See XRayPatchingStatus for possible
-/// result values.
+/// Reverses the effect of __xray_patch_object. See XRayPatchingStatus for
+/// possible result values.
 extern XRayPatchingStatus __xray_unpatch_object(int32_t ObjId);
 
 /// This unpacks the given (packed) function id and patches
@@ -114,8 +114,8 @@ extern XRayPatchingStatus __xray_unpatch_object(int32_t ObjId);
 /// result values.
 extern XRayPatchingStatus __xray_patch_function(int32_t FuncId);
 
-/// This patches a specific function in the given object. See XRayPatchingStatus for possible
-/// result values.
+/// This patches a specific function in the given object. See XRayPatchingStatus
+/// for possible result values.
 extern XRayPatchingStatus __xray_patch_function_in_object(int32_t FuncId,
                                                           int32_t ObjId);
 
@@ -129,26 +129,29 @@ extern XRayPatchingStatus __xray_unpatch_function(int32_t FuncId);
 extern XRayPatchingStatus __xray_unpatch_function_in_object(int32_t FuncId,
                                                             int32_t ObjId);
 
-/// This function unpacks the given (packed) function id and returns the address of the corresponding function. We return 0 if we encounter any error, even if 0 may be a valid function
-/// address.
+/// This function unpacks the given (packed) function id and returns the address
+/// of the corresponding function. We return 0 if we encounter any error, even
+/// if 0 may be a valid function address.
 extern uintptr_t __xray_function_address(int32_t FuncId);
 
-/// This function returns the address of the function in the given object provided valid function and object
-/// ids. We return 0 if we encounter any error, even if 0 may be a valid function
-/// address.
+/// This function returns the address of the function in the given object
+/// provided valid function and object ids. We return 0 if we encounter any
+/// error, even if 0 may be a valid function address.
 extern uintptr_t __xray_function_address_in_object(int32_t FuncId,
                                                    int32_t ObjId);
 
-/// This function returns the maximum valid function id for the main executable (object id = 0). Returns 0 if we
-/// encounter errors (when there are no instrumented functions, etc.).
+/// This function returns the maximum valid function id for the main executable
+/// (object id = 0). Returns 0 if we encounter errors (when there are no
+/// instrumented functions, etc.).
 extern size_t __xray_max_function_id();
 
-/// This function returns the maximum valid function id for the given object. Returns 0 if we
-/// encounter errors (when there are no instrumented functions, etc.).
+/// This function returns the maximum valid function id for the given object.
+/// Returns 0 if we encounter errors (when there are no instrumented functions,
+/// etc.).
 extern size_t __xray_max_function_id_in_object(int32_t ObjId);
 
-/// This function returns the number of previously registered objects (executable + loaded DSOs).
-/// Returns 0 if XRay has not been initialized.
+/// This function returns the number of previously registered objects
+/// (executable + loaded DSOs). Returns 0 if XRay has not been initialized.
 extern size_t __xray_num_objects();
 
 /// Unpacks the function id from the given packed id.
@@ -158,7 +161,8 @@ extern int32_t __xray_unpack_function_id(int32_t PackedId);
 extern int32_t __xray_unpack_object_id(int32_t PackedId);
 
 /// Creates and returns a packed id from the given function and object ids.
-/// If the ids do not fit within the reserved number of bits for each part, the high bits are truncated.
+/// If the ids do not fit within the reserved number of bits for each part, the
+/// high bits are truncated.
 extern int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId);
 
 /// Initialize the required XRay data structures. This is useful in cases where
diff --git a/compiler-rt/lib/xray/xray_AArch64.cpp b/compiler-rt/lib/xray/xray_AArch64.cpp
index c1d77758946edf..2b151162ce4b9b 100644
--- a/compiler-rt/lib/xray/xray_AArch64.cpp
+++ b/compiler-rt/lib/xray/xray_AArch64.cpp
@@ -89,18 +89,23 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
 
 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
                         const XRaySledEntry &Sled,
-                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+                        const XRayTrampolines &Trampolines,
+                        bool LogArgs) XRAY_NEVER_INSTRUMENT {
+  auto Trampoline =
+      LogArgs ? Trampolines.LogArgsTrampoline : Trampolines.EntryTrampoline;
   return patchSled(Enable, FuncId, Sled, Trampoline);
 }
 
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+bool patchFunctionExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
 }
 
-bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
+bool patchFunctionTailExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampolines.TailExitTrampoline);
 }
 
 // AArch64AsmPrinter::LowerPATCHABLE_EVENT_CALL generates this code sequence:
diff --git a/compiler-rt/lib/xray/xray_arm.cpp b/compiler-rt/lib/xray/xray_arm.cpp
index e1818555906c35..e318bb7070e802 100644
--- a/compiler-rt/lib/xray/xray_arm.cpp
+++ b/compiler-rt/lib/xray/xray_arm.cpp
@@ -128,18 +128,23 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
 
 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
                         const XRaySledEntry &Sled,
-                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+                        const XRayTrampolines &Trampolines,
+                        bool LogArgs) XRAY_NEVER_INSTRUMENT {
+  auto Trampoline =
+      LogArgs ? Trampolines.LogArgsTrampoline : Trampolines.EntryTrampoline;
   return patchSled(Enable, FuncId, Sled, Trampoline);
 }
 
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+bool patchFunctionExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
 }
 
-bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
+bool patchFunctionTailExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampolines.TailExitTrampoline);
 }
 
 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
diff --git a/compiler-rt/lib/xray/xray_hexagon.cpp b/compiler-rt/lib/xray/xray_hexagon.cpp
index 7f127b2b499cd2..1c07bb04690368 100644
--- a/compiler-rt/lib/xray/xray_hexagon.cpp
+++ b/compiler-rt/lib/xray/xray_hexagon.cpp
@@ -135,18 +135,23 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
 
 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
                         const XRaySledEntry &Sled,
-                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+                        const XRayTrampolines &Trampolines,
+                        bool LogArgs) XRAY_NEVER_INSTRUMENT {
+  auto Trampoline =
+      LogArgs ? Trampolines.LogArgsTrampoline : Trampolines.EntryTrampoline;
   return patchSled(Enable, FuncId, Sled, Trampoline);
 }
 
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+bool patchFunctionExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
 }
 
-bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+bool patchFunctionTailExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampolines.TailExitTrampoline);
 }
 
 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index 53c93be89cd148..020bfe52b53207 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -210,7 +210,7 @@ __xray_deregister_dso(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
     return false;
   }
 
-  if (ObjId <= 0 || ObjId >= __xray_num_objects()) {
+  if (ObjId <= 0 || static_cast<uint32_t>(ObjId) >= __xray_num_objects()) {
     if (Verbosity())
       Report("Can't deregister object with ID %d: ID is invalid.\n", ObjId);
     return false;
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index 16e60bfc22cd10..d1f732a1786443 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "xray_interface_internal.h"
-#include "llvm/Support/ErrorHandling.h"
 
 #include <cinttypes>
 #include <cstdio>
@@ -154,8 +153,8 @@ namespace {
 
 bool isObjectLoaded(int32_t ObjId) {
   SpinMutexLock Guard(&XRayInstrMapMutex);
-  if (ObjId < 0 ||
-      ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+  if (ObjId < 0 || static_cast<uint32_t>(ObjId) >=
+                       atomic_load(&XRayNumObjects, memory_order_acquire)) {
     return false;
   }
   return XRayInstrMaps[ObjId].Loaded;
@@ -166,28 +165,22 @@ bool patchSled(const XRaySledEntry &Sled, bool Enable, int32_t FuncId,
   bool Success = false;
   switch (Sled.Kind) {
   case XRayEntryType::ENTRY:
-    Success =
-        patchFunctionEntry(Enable, FuncId, Sled, Trampolines.EntryTrampoline);
+    Success = patchFunctionEntry(Enable, FuncId, Sled, Trampolines, false);
     break;
   case XRayEntryType::EXIT:
-    Success =
-        patchFunctionExit(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
+    Success = patchFunctionExit(Enable, FuncId, Sled, Trampolines);
     break;
   case XRayEntryType::TAIL:
-    Success = patchFunctionTailExit(Enable, FuncId, Sled,
-                                    Trampolines.TailExitTrampoline);
+    Success = patchFunctionTailExit(Enable, FuncId, Sled, Trampolines);
     break;
   case XRayEntryType::LOG_ARGS_ENTRY:
-    Success =
-        patchFunctionEntry(Enable, FuncId, Sled, Trampolines.LogArgsTrampoline);
+    Success = patchFunctionEntry(Enable, FuncId, Sled, Trampolines, true);
     break;
   case XRayEntryType::CUSTOM_EVENT:
-    Success = patchCustomEvent(Enable, FuncId, Sled,
-                               Trampolines.CustomEventTrampoline);
+    Success = patchCustomEvent(Enable, FuncId, Sled);
     break;
   case XRayEntryType::TYPED_EVENT:
-    Success =
-        patchTypedEvent(Enable, FuncId, Sled, Trampolines.TypedEventTrampoline);
+    Success = patchTypedEvent(Enable, FuncId, Sled);
     break;
   default:
     Report("Unsupported sled kind '%" PRIu64 "' @%04x\n", Sled.Address,
@@ -236,8 +229,8 @@ XRayPatchingStatus patchFunction(int32_t FuncId, int32_t ObjId,
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    if (ObjId < 0 ||
-        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+    if (ObjId < 0 || static_cast<uint32_t>(ObjId) >=
+                         atomic_load(&XRayNumObjects, memory_order_acquire)) {
       Report("Unable to patch function: invalid sled map index: %d", ObjId);
       return XRayPatchingStatus::FAILED;
     }
@@ -296,8 +289,8 @@ XRayPatchingStatus controlPatchingObjectUnchecked(bool Enable, int32_t ObjId) {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    if (ObjId < 0 ||
-        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+    if (ObjId < 0 || static_cast<uint32_t>(ObjId) >=
+                         atomic_load(&XRayNumObjects, memory_order_acquire)) {
       Report("Unable to patch functions: invalid sled map index: %d\n", ObjId);
       return XRayPatchingStatus::FAILED;
     }
@@ -411,9 +404,7 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
         CombinedStatus = NOT_INITIALIZED;
       break;
     case ONGOING:
-      llvm_unreachable("Status ONGOING should not appear at this point");
-    default:
-      llvm_unreachable("Unhandled patching status");
+      UNREACHABLE("Status ONGOING should not appear at this point");
     }
   }
   return CombinedStatus;
@@ -442,8 +433,8 @@ XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId, int32_t ObjId,
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    if (ObjId < 0 ||
-        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+    if (ObjId < 0 || static_cast<uint32_t>(ObjId) >=
+                         atomic_load(&XRayNumObjects, memory_order_acquire)) {
       Report("Unable to patch function: invalid sled map index: %d\n", ObjId);
       return XRayPatchingStatus::FAILED;
     }
@@ -637,7 +628,7 @@ uintptr_t __xray_function_address_in_object(int32_t FuncId, int32_t ObjId)
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
     auto count = atomic_load(&XRayNumObjects, memory_order_acquire);
-    if (ObjId < 0 || ObjId >= count) {
+    if (ObjId < 0 || static_cast<uint32_t>(ObjId) >= count) {
       Report("Unable to determine function address: invalid sled map index %d "
              "(size is %d)\n",
              ObjId, (int)count);
@@ -667,7 +658,8 @@ size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT {
 
 size_t __xray_max_function_id_in_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
   SpinMutexLock Guard(&XRayInstrMapMutex);
-  if (ObjId < 0 || ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire))
+  if (ObjId < 0 || static_cast<uint32_t>(ObjId) >=
+                       atomic_load(&XRayNumObjects, memory_order_acquire))
     return 0;
   return XRayInstrMaps[ObjId].Functions;
 }
diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h
index 5fbaa9c3f315b1..67d78f48127424 100644
--- a/compiler-rt/lib/xray/xray_interface_internal.h
+++ b/compiler-rt/lib/xray/xray_interface_internal.h
@@ -85,8 +85,6 @@ struct XRayTrampolines {
   void (*ExitTrampoline)();
   void (*TailExitTrampoline)();
   void (*LogArgsTrampoline)();
-  void (*CustomEventTrampoline)();
-  void (*TypedEventTrampoline)();
 
   XRayTrampolines() {
     // These resolve to the definitions in the respective executable or DSO.
@@ -94,8 +92,6 @@ struct XRayTrampolines {
     ExitTrampoline = __xray_FunctionExit;
     TailExitTrampoline = __xray_FunctionTailExit;
     LogArgsTrampoline = __xray_ArgLoggerEntry;
-    CustomEventTrampoline = __xray_CustomEvent;
-    TypedEventTrampoline = __xray_TypedEvent;
   }
 };
 
@@ -140,15 +136,14 @@ struct XRaySledMap {
 };
 
 bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
-                        void (*Trampoline)());
+                        const XRayTrampolines &Trampolines, bool logArgs);
 bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
-                       void (*Trampoline)());
+                       const XRayTrampolines &Trampolines);
 bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
-                           const XRaySledEntry &Sled, void (*Trampoline)());
-bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
-                      void (*Trampoline)());
-bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
-                     void (*Trampoline)());
+                           const XRaySledEntry &Sled,
+                           const XRayTrampolines &Trampolines);
+bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
 
 } // namespace __xray
 
diff --git a/compiler-rt/lib/xray/xray_loongarch64.cpp b/compiler-rt/lib/xray/xray_loongarch64.cpp
index b839adba00d225..b72417d4f2aadb 100644
--- a/compiler-rt/lib/xray/xray_loongarch64.cpp
+++ b/compiler-rt/lib/xray/xray_loongarch64.cpp
@@ -126,20 +126,25 @@ static inline bool patchSled(const bool Enable, const uint32_t FuncId,
 
 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
                         const XRaySledEntry &Sled,
-                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+                        const XRayTrampolines &Trampolines,
+                        bool LogArgs) XRAY_NEVER_INSTRUMENT {
+  auto Trampoline =
+      LogArgs ? Trampolines.LogArgsTrampoline : Trampolines.EntryTrampoline;
   return patchSled(Enable, FuncId, Sled, Trampoline);
 }
 
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+bool patchFunctionExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
 }
 
-bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+bool patchFunctionTailExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
   // TODO: In the future we'd need to distinguish between non-tail exits and
   // tail exits for better information preservation.
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+  return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
 }
 
 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
@@ -158,3 +163,7 @@ bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
 extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
   // TODO: This will have to be implemented in the trampoline assembly file.
 }
+
+extern "C" void __xray_FunctionTailExit() XRAY_NEVER_INSTRUMENT {
+  // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/compiler-rt/lib/xray/xray_mips.cpp b/compiler-rt/lib/xray/xray_mips.cpp
index dc9e837a555dd7..4d2a7c82bddb9d 100644
--- a/compiler-rt/lib/xray/xray_mips.cpp
+++ b/compiler-rt/lib/xray/xray_mips.cpp
@@ -136,20 +136,25 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
 
 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
                         const XRaySledEntry &Sled,
-                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+                        const XRayTrampolines &Trampolines,
+                        bool LogArgs) XRAY_NEVER_INSTRUMENT {
+  auto Trampoline =
+      LogArgs ? Trampolines.LogArgsTrampoline : Trampolines.EntryTrampoline;
   return patchSled(Enable, FuncId, Sled, Trampoline);
 }
 
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+bool patchFunctionExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
 }
 
-bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+bool patchFunctionTailExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
   // FIXME: In the future we'd need to distinguish between non-tail exits and
   // tail exits for better information preservation.
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+  return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
 }
 
 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
@@ -169,3 +174,7 @@ bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
 extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
   // FIXME: this will have to be implemented in the trampoline assembly file
 }
+
+extern "C" void __xray_FunctionTailExit() XRAY_NEVER_INSTRUMENT {
+  // FIXME: this will have to be implemented in the trampoline assembly file
+}
\ No newline at end of file
diff --git a/compiler-rt/lib/xray/xray_mips64.cpp b/compiler-rt/lib/xray/xray_mips64.cpp
index 5b221bb6ddc05d..a088a322cc1568 100644
--- a/compiler-rt/lib/xray/xray_mips64.cpp
+++ b/compiler-rt/lib/xray/xray_mips64.cpp
@@ -144,20 +144,25 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
 
 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
                         const XRaySledEntry &Sled,
-                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+                        const XRayTrampolines &Trampolines,
+                        bool LogArgs) XRAY_NEVER_INSTRUMENT {
+  auto Trampoline =
+      LogArgs ? Trampolines.LogArgsTrampoline : Trampolines.EntryTrampoline;
   return patchSled(Enable, FuncId, Sled, Trampoline);
 }
 
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+bool patchFunctionExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
 }
 
-bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+bool patchFunctionTailExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
   // FIXME: In the future we'd need to distinguish between non-tail exits and
   // tail exits for better information preservation.
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+  return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
 }
 
 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
@@ -176,3 +181,7 @@ bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
 extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
   // FIXME: this will have to be implemented in the trampoline assembly file
 }
+
+extern "C" void __xray_FunctionTailExit() XRAY_NEVER_INSTRUMENT {
+  // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/compiler-rt/lib/xray/xray_powerpc64.cpp b/compiler-rt/lib/xray/xray_powerpc64.cpp
index c3553d848313f9..88cb5fbedb2100 100644
--- a/compiler-rt/lib/xray/xray_powerpc64.cpp
+++ b/compiler-rt/lib/xray/xray_powerpc64.cpp
@@ -51,7 +51,12 @@ namespace __xray {
 
 bool patchFunctionEntry(const bool Enable, uint32_t FuncId,
                         const XRaySledEntry &Sled,
-                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+                        const XRayTrampolines &Trampolines,
+                        bool LogArgs) XRAY_NEVER_INSTRUMENT {
+  // TODO: Trampoline addresses are currently inserted at compile-time, using
+  //       __xray_FunctionEntry and __xray_FunctionExit only.
+  //       To support DSO instrumentation, trampolines have to be written during
+  //       patching (see implementation on X86_64, e.g.).
   const uint64_t Address = Sled.address();
   if (Enable) {
     // lis 0, FuncId[16..32]
@@ -68,8 +73,13 @@ bool patchFunctionEntry(const bool Enable, uint32_t FuncId,
   return true;
 }
 
-bool patchFunctionExit(const bool Enable, uint32_t FuncId,
-                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+bool patchFunctionExit(
+    const bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  // TODO: Trampoline addresses are currently inserted at compile-time, using
+  //       __xray_FunctionEntry and __xray_FunctionExit only.
+  //       To support DSO instrumentation, trampolines have to be written during
+  //       patching (see implementation on X86_64, e.g.).
   const uint64_t Address = Sled.address();
   if (Enable) {
     // lis 0, FuncId[16..32]
@@ -87,8 +97,9 @@ bool patchFunctionExit(const bool Enable, uint32_t FuncId,
 }
 
 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchFunctionExit(Enable, FuncId, Sled);
+                           const XRaySledEntry &Sled,
+                           const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  return patchFunctionExit(Enable, FuncId, Sled, Trampolines);
 }
 
 // FIXME: Maybe implement this better?
@@ -111,3 +122,10 @@ bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
 extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
   // FIXME: this will have to be implemented in the trampoline assembly file
 }
+
+extern "C" void __xray_FunctionTailExit() XRAY_NEVER_INSTRUMENT {
+  // For PowerPC, calls to __xray_FunctionEntry and __xray_FunctionExit
+  // are statically inserted into the sled. Tail exits are handled like normal
+  // function exits. This trampoline is therefore not implemented.
+  // This stub is placed here to avoid linking issues.
+}
diff --git a/compiler-rt/lib/xray/xray_x86_64.cpp b/compiler-rt/lib/xray/xray_x86_64.cpp
index 663a51b2686614..8542351ca26893 100644
--- a/compiler-rt/lib/xray/xray_x86_64.cpp
+++ b/compiler-rt/lib/xray/xray_x86_64.cpp
@@ -122,7 +122,8 @@ static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
 
 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
                         const XRaySledEntry &Sled,
-                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+                        const XRayTrampolines &Trampolines,
+                        bool LogArgs) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
@@ -144,6 +145,8 @@ bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
   // opcode and first operand.
   //
   // Prerequisite is to compute the relative offset to the trampoline's address.
+  auto Trampoline =
+      LogArgs ? Trampolines.LogArgsTrampoline : Trampolines.EntryTrampoline;
   const uint64_t Address = Sled.address();
   int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
                              (static_cast<int64_t>(Address) + 11);
@@ -169,9 +172,9 @@ bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
   return true;
 }
 
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled,
-                       void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+bool patchFunctionExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
@@ -192,6 +195,7 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
   //
   // Prerequisite is to compute the relative offset fo the
   // __xray_FunctionExit function's address.
+  auto Trampoline = Trampolines.ExitTrampoline;
   const uint64_t Address = Sled.address();
   int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
                              (static_cast<int64_t>(Address) + 11);
@@ -217,11 +221,12 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
   return true;
 }
 
-bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled,
-                           void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+bool patchFunctionTailExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the tail call sled with a similar
   // sequence as the entry sled, but calls the tail exit sled instead.
+  auto Trampoline = Trampolines.TailExitTrampoline;
   const uint64_t Address = Sled.address();
   int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
                              (static_cast<int64_t>(Address) + 11);
@@ -248,8 +253,7 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
-                      const XRaySledEntry &Sled,
-                      void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
@@ -277,8 +281,7 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
-                     const XRaySledEntry &Sled,
-                     void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+                     const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:

>From dc24342dd6af51ee1de964e5ff4603cf70d3880f Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <sebastian.kreutzer at tu-darmstadt.de>
Date: Fri, 18 Oct 2024 18:46:44 +0200
Subject: [PATCH 3/3] fixup! [XRay] Add DSO support for XRay instrumentation on
 X86_64

---
 compiler-rt/lib/xray/xray_powerpc64.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/xray/xray_powerpc64.cpp b/compiler-rt/lib/xray/xray_powerpc64.cpp
index 88cb5fbedb2100..a568c568c41860 100644
--- a/compiler-rt/lib/xray/xray_powerpc64.cpp
+++ b/compiler-rt/lib/xray/xray_powerpc64.cpp
@@ -96,9 +96,9 @@ bool patchFunctionExit(
   return true;
 }
 
-bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled,
-                           const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+bool patchFunctionTailExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
   return patchFunctionExit(Enable, FuncId, Sled, Trampolines);
 }
 



More information about the cfe-commits mailing list