[clang] [compiler-rt] [XRay] Add support for instrumentation of DSOs on x86_64 (PR #90959)

Sebastian Kreutzer via llvm-commits llvm-commits at lists.llvm.org
Fri May 3 04:15:52 PDT 2024


https://github.com/sebastiankreutzer created https://github.com/llvm/llvm-project/pull/90959

This PR introduces shared library (DSO) support for XRay based on a revised version of the implementation outlined in [this RFC](https://discourse.llvm.org/t/rfc-upstreaming-dso-instrumentation-support-for-xray/73000).
The feature enables the patching and handling of events from DSOs, supporting both libraries linked at startup or explicitly loaded, e.g. via `dlopen`.
This patch adds the following:
- The `-fxray-enable-shared` flag to enable the feature (turned off by default)
- A small runtime library that is linked into every instrumented DSO, providing position-independent trampolines and code to register with the main XRay runtime
- Changes to the XRay runtime to support management and patching of multiple objects

These changes are fully backward compatible, i.e. running without instrumented DSOs will produce identical traces (in terms of recorded function IDs) to the previous implementation.

Due to my limited ability to test on other architectures, this feature is only implemented and tested with x86_64. Extending support to other architectures is fairly straightforward, requiring only a position-independent implementation of the architecture-specific trampoline implementation (see `compiler-rt/lib/xray/xray_trampoline_x86_64.S` for reference). 

This patch does not include any functionality to resolve function IDs from DSOs for the provided logging/tracing modes. These modes still work and will record calls from DSOs, but symbol resolution for these functions in not available. Getting this to work properly requires recording information about the loaded DSOs and should IMO be discussed in a separate RFC, as there are mulitple feasible approaches. 

@petrhosek @jplehr 

>From 1f0484b73ad0bb9b40e3cd86d8abad4af14e32dc Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <SebastianKreutzer at gmx.net>
Date: Thu, 26 Oct 2023 15:13:05 +0200
Subject: [PATCH] [XRay] Add DSO support for XRay instrumentation on X86_64

---
 clang/include/clang/Driver/Options.td         |   4 +
 clang/include/clang/Driver/XRayArgs.h         |   4 +
 clang/lib/Driver/ToolChains/CommonArgs.cpp    |  12 +-
 clang/lib/Driver/XRayArgs.cpp                 |   7 +
 .../cmake/Modules/AllSupportedArchDefs.cmake  |   1 +
 compiler-rt/cmake/config-ix.cmake             |   4 +
 compiler-rt/include/xray/xray_interface.h     |  23 ++
 compiler-rt/lib/xray/CMakeLists.txt           |  73 ++++-
 compiler-rt/lib/xray/xray_dso_init.cpp        |  62 +++++
 compiler-rt/lib/xray/xray_init.cpp            | 158 +++++++++--
 compiler-rt/lib/xray/xray_interface.cpp       | 261 ++++++++++++++----
 .../lib/xray/xray_interface_internal.h        |  83 +++++-
 compiler-rt/lib/xray/xray_trampoline_x86_64.S |  24 +-
 compiler-rt/lib/xray/xray_x86_64.cpp          |  23 +-
 .../xray/TestCases/Posix/basic-mode-dso.cpp   |  47 ++++
 .../TestCases/Posix/clang-enable-shared.cpp   |  14 +
 .../test/xray/TestCases/Posix/dlopen.cpp      | 110 ++++++++
 .../TestCases/Posix/patch-premain-dso.cpp     |  45 +++
 .../Posix/patching-unpatching-dso.cpp         |  75 +++++
 19 files changed, 912 insertions(+), 118 deletions(-)
 create mode 100644 compiler-rt/lib/xray/xray_dso_init.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 953f6fc649e621..3e3be5475c0c4d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2850,6 +2850,10 @@ def fxray_selected_function_group :
   HelpText<"When using -fxray-function-groups, select which group of functions to instrument. Valid range is 0 to fxray-function-groups - 1">,
   MarshallingInfoInt<CodeGenOpts<"XRaySelectedFunctionGroup">, "0">;
 
+def fxray_enable_shared : Flag<["-"], "fxray-enable-shared">, Group<f_Group>,  Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Enable shared library instrumentation.">;
+def fno_xray_enable_shared : Flag<["-"], "fno-xray-enable-shared">, Group<f_Group>,
+  Visibility<[ClangOption, CC1Option]>;
 
 defm fine_grained_bitfield_accesses : BoolOption<"f", "fine-grained-bitfield-accesses",
   CodeGenOpts<"FineGrainedBitfieldAccesses">, DefaultFalse,
diff --git a/clang/include/clang/Driver/XRayArgs.h b/clang/include/clang/Driver/XRayArgs.h
index bdd3d979547eed..90a21e69586033 100644
--- a/clang/include/clang/Driver/XRayArgs.h
+++ b/clang/include/clang/Driver/XRayArgs.h
@@ -27,6 +27,7 @@ class XRayArgs {
   XRayInstrSet InstrumentationBundle;
   llvm::opt::Arg *XRayInstrument = nullptr;
   bool XRayRT = true;
+  bool XRayEnableShared = false;
 
 public:
   /// Parses the XRay arguments from an argument list.
@@ -35,6 +36,9 @@ class XRayArgs {
                llvm::opt::ArgStringList &CmdArgs, types::ID InputType) const;
 
   bool needsXRayRt() const { return XRayInstrument && XRayRT; }
+  bool needsXRayDSORt() const {
+    return XRayInstrument && XRayRT && XRayEnableShared;
+  }
   llvm::ArrayRef<std::string> modeList() const { return Modes; }
   XRayInstrSet instrumentationBundle() const { return InstrumentationBundle; }
 };
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 6796b43a155020..399bf795ce394e 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1520,10 +1520,14 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
 }
 
 bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringList &CmdArgs) {
-  if (Args.hasArg(options::OPT_shared))
-    return false;
-
-  if (TC.getXRayArgs().needsXRayRt()) {
+  if (Args.hasArg(options::OPT_shared)) {
+    if (TC.getXRayArgs().needsXRayDSORt()) {
+      CmdArgs.push_back("-whole-archive");
+      CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray-dso"));
+      CmdArgs.push_back("-no-whole-archive");
+      return true;
+    }
+  } else if (TC.getXRayArgs().needsXRayRt()) {
     CmdArgs.push_back("--whole-archive");
     CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray"));
     for (const auto &Mode : TC.getXRayArgs().modeList())
diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp
index 8c5134e2501358..7809cd7ef7c759 100644
--- a/clang/lib/Driver/XRayArgs.cpp
+++ b/clang/lib/Driver/XRayArgs.cpp
@@ -63,6 +63,10 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
         << XRayInstrument->getSpelling() << Triple.str();
   }
 
+  if (Args.hasFlag(options::OPT_fxray_enable_shared,
+                   options::OPT_fno_xray_enable_shared, false))
+    XRayEnableShared = true;
+
   // Both XRay and -fpatchable-function-entry use
   // TargetOpcode::PATCHABLE_FUNCTION_ENTER.
   if (Arg *A = Args.getLastArg(options::OPT_fpatchable_function_entry_EQ))
@@ -177,6 +181,9 @@ void XRayArgs::addArgs(const ToolChain &TC, const ArgList &Args,
   Args.addOptOutFlag(CmdArgs, options::OPT_fxray_function_index,
                      options::OPT_fno_xray_function_index);
 
+  if (XRayEnableShared)
+    CmdArgs.push_back("-fxray-enable-shared");
+
   if (const Arg *A =
           Args.getLastArg(options::OPT_fxray_instruction_threshold_EQ)) {
     int Value;
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index 2fe06273a814c7..ea87edceb1b4a6 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -84,6 +84,7 @@ else()
 set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
 		powerpc64le ${HEXAGON} ${LOONGARCH64})
 endif()
+set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64})
 set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})
 
 if (UNIX)
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index ba740af9e1d60f..1b20015576532c 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -656,6 +656,9 @@ if(APPLE)
   list_intersect(XRAY_SUPPORTED_ARCH
     ALL_XRAY_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(XRAY_DSO_SUPPORTED_ARCH
+    ALL_XRAY_DSO_SUPPORTED_ARCH
+    SANITIZER_COMMON_SUPPORTED_ARCH)
   list_intersect(SHADOWCALLSTACK_SUPPORTED_ARCH
     ALL_SHADOWCALLSTACK_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
@@ -689,6 +692,7 @@ else()
   filter_available_targets(CFI_SUPPORTED_ARCH ${ALL_CFI_SUPPORTED_ARCH})
   filter_available_targets(SCUDO_STANDALONE_SUPPORTED_ARCH ${ALL_SCUDO_STANDALONE_SUPPORTED_ARCH})
   filter_available_targets(XRAY_SUPPORTED_ARCH ${ALL_XRAY_SUPPORTED_ARCH})
+  filter_available_targets(XRAY_DSO_SUPPORTED_ARCH ${ALL_XRAY_DSO_SUPPORTED_ARCH})
   filter_available_targets(SHADOWCALLSTACK_SUPPORTED_ARCH
     ${ALL_SHADOWCALLSTACK_SUPPORTED_ARCH})
   filter_available_targets(GWP_ASAN_SUPPORTED_ARCH ${ALL_GWP_ASAN_SUPPORTED_ARCH})
diff --git a/compiler-rt/include/xray/xray_interface.h b/compiler-rt/include/xray/xray_interface.h
index 727431c04e4f73..19c0f6b23175d0 100644
--- a/compiler-rt/include/xray/xray_interface.h
+++ b/compiler-rt/include/xray/xray_interface.h
@@ -97,27 +97,50 @@ enum XRayPatchingStatus {
 /// for possible result values.
 extern XRayPatchingStatus __xray_patch();
 
+extern XRayPatchingStatus __xray_patch_object(int32_t ObjId);
+
 /// Reverses the effect of __xray_patch(). See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_unpatch();
 
+extern XRayPatchingStatus __xray_unpatch_object(int32_t ObjId);
+
 /// This patches a specific function id. See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_patch_function(int32_t FuncId);
 
+extern XRayPatchingStatus __xray_patch_function_in_object(int32_t FuncId,
+                                                          int32_t ObjId);
+
 /// This unpatches a specific function id. See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_unpatch_function(int32_t FuncId);
 
+extern XRayPatchingStatus __xray_unpatch_function_in_object(int32_t FuncId,
+                                                            int32_t ObjId);
+
 /// This function returns the address of the function provided a valid function
 /// id. We return 0 if we encounter any error, even if 0 may be a valid function
 /// address.
 extern uintptr_t __xray_function_address(int32_t FuncId);
 
+extern uintptr_t __xray_function_address_in_object(int32_t FuncId,
+                                                   int32_t ObjId);
+
 /// This function returns the maximum valid function id. Returns 0 if we
 /// encounter errors (when there are no instrumented functions, etc.).
 extern size_t __xray_max_function_id();
 
+extern size_t __xray_max_function_id_in_object(int32_t ObjId);
+
+extern size_t __xray_num_objects();
+
+extern int32_t __xray_unpack_function_id(int32_t PackedId);
+
+extern int32_t __xray_unpack_object_id(int32_t PackedId);
+
+extern int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId);
+
 /// Initialize the required XRay data structures. This is useful in cases where
 /// users want to control precisely when the XRay instrumentation data
 /// structures are initialized, for example when the XRay library is built with
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index cf7b5062aae32d..165e61b6f45762 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -10,6 +10,10 @@ set(XRAY_SOURCES
   xray_utils.cpp
   )
 
+set(XRAY_DSO_SOURCES
+  xray_dso_init.cpp
+  )
+
 # Implementation files for all XRay modes.
 set(XRAY_FDR_MODE_SOURCES
   xray_fdr_flags.cpp
@@ -33,6 +37,11 @@ set(x86_64_SOURCES
   xray_trampoline_x86_64.S
   )
 
+set(x86_64_DSO_SOURCES
+   xray_trampoline_x86_64.S
+   )
+
+
 set(arm_SOURCES
   xray_arm.cpp
   xray_trampoline_arm.S
@@ -128,10 +137,12 @@ set(XRAY_IMPL_HEADERS
 # consumption by tests.
 set(XRAY_ALL_SOURCE_FILES
   ${XRAY_SOURCES}
+  ${XRAY_DSO_SOURCES}
   ${XRAY_FDR_MODE_SOURCES}
   ${XRAY_BASIC_MODE_SOURCES}
   ${XRAY_PROFILING_MODE_SOURCES}
   ${x86_64_SOURCES}
+  ${x86_64_DSO_SOURCES}
   ${arm_SOURCES}
   ${armhf_SOURCES}
   ${hexagon_SOURCES}
@@ -162,6 +173,9 @@ set(XRAY_CFLAGS
   ${COMPILER_RT_CXX_CFLAGS})
 set(XRAY_COMMON_DEFINITIONS SANITIZER_COMMON_NO_REDEFINE_BUILTINS XRAY_HAS_EXCEPTIONS=1)
 
+# DSO trampolines need to be compiled with GOT addressing
+set(XRAY_COMMON_DEFINITIONS_DSO ${XRAY_COMMON_DEFINITIONS} XRAY_PIC)
+
 # Too many existing bugs, needs cleanup.
 append_list_if(COMPILER_RT_HAS_WNO_FORMAT -Wno-format XRAY_CFLAGS)
 
@@ -201,7 +215,16 @@ if (APPLE)
     CFLAGS ${XRAY_CFLAGS}
     DEFS ${XRAY_COMMON_DEFINITIONS}
     DEPS ${XRAY_DEPS})
+  add_compiler_rt_object_libraries(RTXrayDSO
+    OS ${XRAY_SUPPORTED_OS}
+    ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+    SOURCES ${XRAY_DSO_SOURCES}
+    ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+    CFLAGS ${XRAY_CFLAGS}
+    DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+    DEPS ${XRAY_DEPS})
   set(XRAY_RTXRAY_ARCH_LIBS "")
+  set(XRAY_DSO_RTXRAY_ARCH_LIBS "")
   foreach(arch ${XRAY_SUPPORTED_ARCH})
     if(NOT ${arch} IN_LIST XRAY_SOURCE_ARCHS)
       continue()
@@ -215,6 +238,17 @@ if (APPLE)
       DEFS ${XRAY_COMMON_DEFINITIONS}
       DEPS ${XRAY_DEPS})
     list(APPEND XRAY_RTXRAY_ARCH_LIBS RTXray_${arch})
+    if (${arch} IN_LIST XRAY_DSO_SUPPORTED_ARCH)
+      add_compiler_rt_object_libraries(RTXrayDSO_${arch}
+        OS ${XRAY_SUPPORTED_OS}
+        ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+        SOURCES ${${arch}_DSO_SOURCES}
+        ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+        CFLAGS ${XRAY_CFLAGS}
+        DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+        DEPS ${XRAY_DEPS})
+      list(APPEND XRAY_DSO_RTXRAY_ARCH_LIBS RTXrayDSO_${arch})
+    endif()
   endforeach()
   add_compiler_rt_object_libraries(RTXrayFDR
     OS ${XRAY_SUPPORTED_OS}
@@ -252,6 +286,17 @@ if (APPLE)
     LINK_FLAGS ${XRAY_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
     LINK_LIBS ${XRAY_LINK_LIBS}
     PARENT_TARGET xray)
+  add_compiler_rt_runtime(clang_rt.xray-dso
+    STATIC
+    OS ${XRAY_SUPPORTED_OS}
+    ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+    OBJECT_LIBS RTXrayDSO ${XRAY_DSO_RTXRAY_ARCH_LIBS}
+    CFLAGS ${XRAY_CFLAGS}
+    DEFS ${XRAY_COMMON_DEFINITIONS}
+    LINK_FLAGS ${XRAY_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
+    LINK_LIBS ${XRAY_LINK_LIBS}
+    PARENT_TARGET xray)
+
   add_compiler_rt_runtime(clang_rt.xray-fdr
     STATIC
     OS ${XRAY_SUPPORTED_OS}
@@ -283,6 +328,7 @@ if (APPLE)
     LINK_LIBS ${XRAY_LINK_LIBS}
     PARENT_TARGET xray)
 else() # not Apple
+  message("DSO Supported Archs: ${XRAY_DSO_SUPPORTED_ARCH}")
   foreach(arch ${XRAY_SUPPORTED_ARCH})
     if(NOT CAN_TARGET_${arch})
       continue()
@@ -325,7 +371,7 @@ else() # not Apple
      LINK_LIBS ${XRAY_LINK_LIBS}
      DEFS ${XRAY_COMMON_DEFINITIONS}
      OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS} RTXray
-     PARENT_TARGET xray)
+     PARENT_TARGET xray)  
     # FDR mode runtime archive (addon for clang_rt.xray)
     add_compiler_rt_runtime(clang_rt.xray-fdr
       STATIC
@@ -346,8 +392,8 @@ else() # not Apple
       DEFS ${XRAY_COMMON_DEFINITIONS}
       OBJECT_LIBS RTXrayBASIC
       PARENT_TARGET xray)
-   # Profiler Mode runtime
-   add_compiler_rt_runtime(clang_rt.xray-profiling
+    # Profiler Mode runtime
+    add_compiler_rt_runtime(clang_rt.xray-profiling
      STATIC
      ARCHS ${arch}
      CFLAGS ${XRAY_CFLAGS}
@@ -356,6 +402,27 @@ else() # not Apple
      DEFS ${XRAY_COMMON_DEFINITIONS}
      OBJECT_LIBS RTXrayPROFILING
      PARENT_TARGET xray)
+
+    if (${arch} IN_LIST XRAY_DSO_SUPPORTED_ARCH)
+      # TODO: Only implemented for X86 at the moment
+      add_compiler_rt_object_libraries(RTXrayDSO
+        ARCHS ${arch}
+        SOURCES ${XRAY_DSO_SOURCES} ${${arch}_DSO_SOURCES} 
+        ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+        CFLAGS ${XRAY_CFLAGS}
+        DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+        DEPS ${XRAY_DEPS})
+      # DSO runtime archive
+      add_compiler_rt_runtime(clang_rt.xray-dso
+        STATIC
+        ARCHS ${arch}
+        CFLAGS ${XRAY_CFLAGS}
+        LINK_FLAGS ${XRAY_LINK_FLAGS}
+        LINK_LIBS ${XRAY_LINK_LIBS}
+        DEFS ${XRAY_COMMON_DEFINITIONS}
+        OBJECT_LIBS RTXrayDSO
+        PARENT_TARGET xray)
+    endif()
   endforeach()
 endif() # not Apple
 
diff --git a/compiler-rt/lib/xray/xray_dso_init.cpp b/compiler-rt/lib/xray/xray_dso_init.cpp
new file mode 100644
index 00000000000000..ad5f91f3f2448f
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_dso_init.cpp
@@ -0,0 +1,62 @@
+//===-- xray_init.cpp -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay initialisation logic for DSOs.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+
+using namespace __sanitizer;
+
+extern "C" {
+extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+
+#if SANITIZER_MAC
+// HACK: This is a temporary workaround to make XRay build on
+// Darwin, but it will probably not work at runtime.
+extern const XRaySledEntry __start_xray_instr_map[] = {};
+extern const XRaySledEntry __stop_xray_instr_map[] = {};
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] = {};
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] = {};
+#endif
+}
+
+// Handler functions to call in the patched entry/exit sled.
+extern atomic_uintptr_t XRayPatchedFunction;
+extern atomic_uintptr_t XRayArgLogger;
+extern atomic_uintptr_t XRayPatchedCustomEvent;
+extern atomic_uintptr_t XRayPatchedTypedEvent;
+
+static int __xray_object_id{-1};
+
+// Note: .preinit_array initialization does not work for DSOs
+__attribute__((constructor(0))) static void
+__xray_init_dso() XRAY_NEVER_INSTRUMENT {
+  // Register sleds in main XRay runtime.
+  __xray_object_id =
+      __xray_register_dso(__start_xray_instr_map, __stop_xray_instr_map,
+                          __start_xray_fn_idx, __stop_xray_fn_idx, {});
+}
+
+__attribute__((destructor(0))) static void
+__xray_finalize_dso() XRAY_NEVER_INSTRUMENT {
+  // Inform the main runtime that this DSO is no longer used.
+  __xray_deregister_dso(__xray_object_id);
+}
diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index f22a31b95686d0..c3570d97701eee 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -16,6 +16,8 @@
 #include <unistd.h>
 
 #include "sanitizer_common/sanitizer_common.h"
+#include "xray/xray_interface.h"
+#include "xray_allocator.h"
 #include "xray_defs.h"
 #include "xray_flags.h"
 #include "xray_interface_internal.h"
@@ -28,7 +30,7 @@ extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak));
 extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak));
 
 #if SANITIZER_APPLE
-// HACK: This is a temporary workaround to make XRay build on 
+// HACK: This is a temporary workaround to make XRay build on
 // Darwin, but it will probably not work at runtime.
 const XRaySledEntry __start_xray_instr_map[] = {};
 extern const XRaySledEntry __stop_xray_instr_map[] = {};
@@ -50,7 +52,11 @@ atomic_uint8_t XRayInitialized{0};
 
 // This should always be updated before XRayInitialized is updated.
 SpinMutex XRayInstrMapMutex;
-XRaySledMap XRayInstrMap;
+// XRaySledMap XRayInstrMap;
+//  Contains maps for the main executable as well as DSOs.
+// std::vector<XRaySledMap> XRayInstrMaps;
+XRaySledMap *XRayInstrMaps;
+atomic_uint32_t XRayNumObjects;
 
 // Global flag to determine whether the flags have been initialized.
 atomic_uint8_t XRayFlagsInitialized{0};
@@ -58,6 +64,60 @@ atomic_uint8_t XRayFlagsInitialized{0};
 // A mutex to allow only one thread to initialize the XRay data structures.
 SpinMutex XRayInitMutex;
 
+int32_t
+__xray_register_sleds(const XRaySledEntry *SledsBegin,
+                      const XRaySledEntry *SledsEnd,
+                      const XRayFunctionSledIndex *FnIndexBegin,
+                      const XRayFunctionSledIndex *FnIndexEnd, bool FromDSO,
+                      XRayTrampolines Trampolines) XRAY_NEVER_INSTRUMENT {
+  if (!SledsBegin || !SledsEnd) {
+    return -1;
+  }
+  XRaySledMap SledMap;
+  SledMap.FromDSO = FromDSO;
+  SledMap.Loaded = true;
+  SledMap.Trampolines = Trampolines;
+  SledMap.Sleds = SledsBegin;
+  SledMap.Entries = SledsEnd - SledsBegin;
+  if (FnIndexBegin != nullptr) {
+    SledMap.SledsIndex = FnIndexBegin;
+    SledMap.Functions = FnIndexEnd - FnIndexBegin;
+  } else {
+    size_t CountFunctions = 0;
+    uint64_t LastFnAddr = 0;
+
+    for (std::size_t I = 0; I < SledMap.Entries; I++) {
+      const auto &Sled = SledMap.Sleds[I];
+      const auto Function = Sled.function();
+      if (Function != LastFnAddr) {
+        CountFunctions++;
+        LastFnAddr = Function;
+      }
+    }
+
+    SledMap.Functions = CountFunctions;
+  }
+  if (SledMap.Functions >= XRayMaxFunctions) {
+    Report("Too many functions! Maximum is %ld\n", XRayMaxFunctions);
+    return -1;
+  }
+
+  if (Verbosity()) {
+    Report("Registering %d new functions!\n", SledMap.Functions);
+  }
+
+  {
+    SpinMutexLock Guard(&XRayInstrMapMutex);
+    auto Idx = atomic_fetch_add(&XRayNumObjects, 1, memory_order_acq_rel);
+    if (Idx >= XRayMaxObjects) {
+      Report("Too many objects registered! Maximum is %ld\n", XRayMaxObjects);
+      return -1;
+    }
+    XRayInstrMaps[Idx] = std::move(SledMap);
+    return Idx;
+  }
+}
+
 // __xray_init() will do the actual loading of the current process' memory map
 // and then proceed to look for the .xray_instr_map section/segment.
 void __xray_init() XRAY_NEVER_INSTRUMENT {
@@ -80,29 +140,14 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
     return;
   }
 
-  {
-    SpinMutexLock Guard(&XRayInstrMapMutex);
-    XRayInstrMap.Sleds = __start_xray_instr_map;
-    XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
-    if (__start_xray_fn_idx != nullptr) {
-      XRayInstrMap.SledsIndex = __start_xray_fn_idx;
-      XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx;
-    } else {
-      size_t CountFunctions = 0;
-      uint64_t LastFnAddr = 0;
-
-      for (std::size_t I = 0; I < XRayInstrMap.Entries; I++) {
-        const auto &Sled = XRayInstrMap.Sleds[I];
-        const auto Function = Sled.function();
-        if (Function != LastFnAddr) {
-          CountFunctions++;
-          LastFnAddr = Function;
-        }
-      }
+  atomic_store(&XRayNumObjects, 0, memory_order_release);
+
+  // Pre-allocation takes up approx. 5kB for XRayMaxObjects=64.
+  XRayInstrMaps = allocateBuffer<XRaySledMap>(XRayMaxObjects);
+
+  __xray_register_sleds(__start_xray_instr_map, __stop_xray_instr_map,
+                        __start_xray_fn_idx, __stop_xray_fn_idx, false, {});
 
-      XRayInstrMap.Functions = CountFunctions;
-    }
-  }
   atomic_store(&XRayInitialized, true, memory_order_release);
 
 #ifndef XRAY_NO_PREINIT
@@ -111,6 +156,71 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
 #endif
 }
 
+// Default visibility is hidden, so we have to explicitly make it visible to
+// DSO.
+SANITIZER_INTERFACE_ATTRIBUTE int32_t __xray_register_dso(
+    const XRaySledEntry *SledsBegin, const XRaySledEntry *SledsEnd,
+    const XRayFunctionSledIndex *FnIndexBegin,
+    const XRayFunctionSledIndex *FnIndexEnd,
+    XRayTrampolines Trampolines) XRAY_NEVER_INSTRUMENT {
+  // Make sure XRay has been initialized in the main executable.
+  __xray_init();
+
+  if (__xray_num_objects() == 0) {
+    if (Verbosity())
+      Report("No XRay instrumentation map in main executable. Not initializing "
+             "XRay for DSO.\n");
+    return -1;
+  }
+
+  // Register sleds in global map.
+  int ObjId = __xray_register_sleds(SledsBegin, SledsEnd, FnIndexBegin,
+                                     FnIndexEnd, true, Trampolines);
+
+#ifndef XRAY_NO_PREINIT
+  if (ObjId >= 0 && flags()->patch_premain)
+    __xray_patch_object(ObjId);
+#endif
+
+  return ObjId;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE bool
+__xray_deregister_dso(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  // Make sure XRay has been initialized in the main executable.
+  __xray_init();
+
+  if (ObjId <= 0 || ObjId >= __xray_num_objects()) {
+    if (Verbosity())
+      Report("Can't deregister object with ID %d: ID is invalid.\n", ObjId);
+    return false;
+  }
+
+  {
+    SpinMutexLock Guard(&XRayInstrMapMutex);
+    auto &Entry = XRayInstrMaps[ObjId];
+    if (!Entry.FromDSO) {
+      if (Verbosity())
+        Report("Can't deregister object with ID %d: object does not correspond "
+               "to a shared library.\n",
+               ObjId);
+      return false;
+    }
+    if (!Entry.Loaded) {
+      if (Verbosity())
+        Report("Can't deregister object with ID %d: object is not loaded.\n",
+               ObjId);
+    }
+    // This is all we have to do here.
+    Entry.Loaded = false;
+  }
+
+  if (Verbosity())
+    Report("Deregistered object with ID %d.\n", ObjId);
+
+  return true;
+}
+
 // FIXME: Make check-xray tests work on FreeBSD without
 // SANITIZER_CAN_USE_PREINIT_ARRAY.
 // See sanitizer_internal_defs.h where the macro is defined.
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index 5839043fcb93a8..dcd75c83b0b00e 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -36,7 +36,8 @@
 
 extern __sanitizer::SpinMutex XRayInstrMapMutex;
 extern __sanitizer::atomic_uint8_t XRayInitialized;
-extern __xray::XRaySledMap XRayInstrMap;
+extern __xray::XRaySledMap *XRayInstrMaps;
+extern __sanitizer::atomic_uint32_t XRayNumObjects;
 
 namespace __xray {
 
@@ -61,16 +62,16 @@ static const int16_t cSledLength = 20;
 #endif /* CPU architecture */
 
 // This is the function to call when we encounter the entry or exit sleds.
-atomic_uintptr_t XRayPatchedFunction{0};
+atomic_uintptr_t XRayPatchedFunction SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call from the arg1-enabled sleds/trampolines.
-atomic_uintptr_t XRayArgLogger{0};
+atomic_uintptr_t XRayArgLogger SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call when we encounter a custom event log call.
-atomic_uintptr_t XRayPatchedCustomEvent{0};
+atomic_uintptr_t XRayPatchedCustomEvent SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call when we encounter a typed event log call.
-atomic_uintptr_t XRayPatchedTypedEvent{0};
+atomic_uintptr_t XRayPatchedTypedEvent SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the global status to determine whether we are currently
 // patching/unpatching.
@@ -150,27 +151,42 @@ class MProtectHelper {
 
 namespace {
 
-bool patchSled(const XRaySledEntry &Sled, bool Enable,
-               int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+bool isObjectLoaded(int32_t ObjId) {
+  SpinMutexLock Guard(&XRayInstrMapMutex);
+  if (ObjId < 0 ||
+      ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+    return false;
+  }
+  return XRayInstrMaps[ObjId].Loaded;
+}
+
+bool patchSled(const XRaySledEntry &Sled, bool Enable, int32_t FuncId,
+               const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
   bool Success = false;
   switch (Sled.Kind) {
   case XRayEntryType::ENTRY:
-    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry);
+    Success =
+        patchFunctionEntry(Enable, FuncId, Sled, Trampolines.EntryTrampoline);
     break;
   case XRayEntryType::EXIT:
-    Success = patchFunctionExit(Enable, FuncId, Sled);
+    Success =
+        patchFunctionExit(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
     break;
   case XRayEntryType::TAIL:
-    Success = patchFunctionTailExit(Enable, FuncId, Sled);
+    Success = patchFunctionTailExit(Enable, FuncId, Sled,
+                                    Trampolines.TailExitTrampoline);
     break;
   case XRayEntryType::LOG_ARGS_ENTRY:
-    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry);
+    Success =
+        patchFunctionEntry(Enable, FuncId, Sled, Trampolines.LogArgsTrampoline);
     break;
   case XRayEntryType::CUSTOM_EVENT:
-    Success = patchCustomEvent(Enable, FuncId, Sled);
+    Success = patchCustomEvent(Enable, FuncId, Sled,
+                               Trampolines.CustomEventTrampoline);
     break;
   case XRayEntryType::TYPED_EVENT:
-    Success = patchTypedEvent(Enable, FuncId, Sled);
+    Success =
+        patchTypedEvent(Enable, FuncId, Sled, Trampolines.TypedEventTrampoline);
     break;
   default:
     Report("Unsupported sled kind '%" PRIu64 "' @%04x\n", Sled.Address,
@@ -205,10 +221,9 @@ findFunctionSleds(int32_t FuncId,
   return Index;
 }
 
-XRayPatchingStatus patchFunction(int32_t FuncId,
+XRayPatchingStatus patchFunction(int32_t FuncId, int32_t ObjId,
                                  bool Enable) XRAY_NEVER_INSTRUMENT {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
     return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
 
   uint8_t NotPatching = false;
@@ -220,13 +235,24 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch function: invalid sled map index: %d", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
 
   // If we don't have an index, we can't patch individual functions.
   if (InstrMap.Functions == 0)
     return XRayPatchingStatus::NOT_INITIALIZED;
 
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Invalid function id provided: %d\n", FuncId);
+    return XRayPatchingStatus::NOT_INITIALIZED;
+  }
+
   // FuncId must be a positive number, less than the number of functions
   // instrumented.
   if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) {
@@ -234,6 +260,8 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
     return XRayPatchingStatus::FAILED;
   }
 
+  auto PackedId = __xray::MakePackedId(FuncId, ObjId);
+
   // Now we patch ths sleds for this specific function.
   XRayFunctionSledIndex SledRange;
   if (InstrMap.SledsIndex) {
@@ -242,13 +270,13 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
   } else {
     SledRange = findFunctionSleds(FuncId, InstrMap);
   }
+
   auto *f = SledRange.Begin;
   bool SucceedOnce = false;
   for (size_t i = 0; i != SledRange.Size; ++i)
-    SucceedOnce |= patchSled(f[i], Enable, FuncId);
+    SucceedOnce |= patchSled(f[i], Enable, PackedId, InstrMap.Trampolines);
 
-  atomic_store(&XRayPatching, false,
-                            memory_order_release);
+  atomic_store(&XRayPatching, false, memory_order_release);
 
   if (!SucceedOnce) {
     Report("Failed patching any sled for function '%d'.", FuncId);
@@ -261,9 +289,10 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
 // controlPatching implements the common internals of the patching/unpatching
 // implementation. |Enable| defines whether we're enabling or disabling the
 // runtime XRay instrumentation.
-XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
+XRayPatchingStatus controlPatching(bool Enable,
+                                   int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
     return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
 
   uint8_t NotPatching = false;
@@ -275,18 +304,31 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
   auto XRayPatchingStatusResetter =
       at_scope_exit([&PatchingSuccess] {
         if (!PatchingSuccess)
-          atomic_store(&XRayPatching, false,
-                                    memory_order_release);
+          atomic_store(&XRayPatching, false, memory_order_release);
       });
 
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch functions: invalid sled map index: %d\n", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
   if (InstrMap.Entries == 0)
     return XRayPatchingStatus::NOT_INITIALIZED;
 
+  if (Verbosity())
+    Report("Patching object %d with %d functions.\n", ObjId, InstrMap.Entries);
+
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Object is not loaded at index: %d\n", ObjId);
+    return XRayPatchingStatus::FAILED;
+  }
+
   uint32_t FuncId = 1;
   uint64_t CurFun = 0;
 
@@ -336,20 +378,31 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
       ++FuncId;
       CurFun = F;
     }
-    patchSled(Sled, Enable, FuncId);
+    auto PackedId = __xray::MakePackedId(FuncId, ObjId);
+    patchSled(Sled, Enable, PackedId, InstrMap.Trampolines);
   }
-  atomic_store(&XRayPatching, false,
-                            memory_order_release);
+  atomic_store(&XRayPatching, false, memory_order_release);
   PatchingSuccess = true;
   return XRayPatchingStatus::SUCCESS;
 }
 
-XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
+XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId, int32_t ObjId,
                                             bool Enable) XRAY_NEVER_INSTRUMENT {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch function: invalid sled map index: %d\n", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
+  }
+
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Object is not loaded at index: %d\n", ObjId);
+    return XRayPatchingStatus::FAILED;
   }
 
   // FuncId must be a positive number, less than the number of functions
@@ -367,7 +420,7 @@ XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
     return XRayPatchingStatus::FAILED;
   }
 
-  // Here we compute the minimum sled and maximum sled associated with a
+  // Here we compute the minumum sled and maximum sled associated with a
   // particular function ID.
   XRayFunctionSledIndex SledRange;
   if (InstrMap.SledsIndex) {
@@ -398,7 +451,7 @@ XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
     Report("Failed mprotect: %d\n", errno);
     return XRayPatchingStatus::FAILED;
   }
-  return patchFunction(FuncId, Enable);
+  return patchFunction(FuncId, ObjId, Enable);
 }
 
 } // namespace
@@ -412,12 +465,10 @@ using namespace __xray;
 
 int __xray_set_handler(void (*entry)(int32_t,
                                      XRayEntryType)) XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
 
     atomic_store(&__xray::XRayPatchedFunction,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -425,11 +476,9 @@ int __xray_set_handler(void (*entry)(int32_t,
 
 int __xray_set_customevent_handler(void (*entry)(void *, size_t))
     XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
     atomic_store(&__xray::XRayPatchedCustomEvent,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -437,11 +486,9 @@ int __xray_set_customevent_handler(void (*entry)(void *, size_t))
 
 int __xray_set_typedevent_handler(void (*entry)(size_t, const void *,
                                                 size_t)) XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
     atomic_store(&__xray::XRayPatchedTypedEvent,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -471,42 +518,123 @@ uint16_t __xray_register_event_type(
 }
 
 XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT {
-  return controlPatching(true);
+  XRayPatchingStatus CombinedStatus{SUCCESS};
+  for (size_t I = 0; I < __xray_num_objects(); ++I) {
+    if (!isObjectLoaded(I))
+      continue;
+    auto LastStatus = controlPatching(true, I);
+    switch (LastStatus) {
+    case FAILED:
+      CombinedStatus = FAILED;
+      break;
+    case NOT_INITIALIZED:
+      if (CombinedStatus != FAILED)
+        CombinedStatus = NOT_INITIALIZED;
+      break;
+    case ONGOING:
+      if (CombinedStatus != FAILED && CombinedStatus != NOT_INITIALIZED)
+        CombinedStatus = ONGOING;
+      break;
+    default:
+      break;
+    }
+  }
+  return CombinedStatus;
+}
+
+XRayPatchingStatus __xray_patch_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return controlPatching(true, ObjId);
 }
 
 XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT {
-  return controlPatching(false);
+  XRayPatchingStatus CombinedStatus{SUCCESS};
+  for (size_t I = 0; I < __xray_num_objects(); ++I) {
+    if (!isObjectLoaded(I))
+      continue;
+    auto LastStatus = controlPatching(false, I);
+    switch (LastStatus) {
+    case FAILED:
+      CombinedStatus = FAILED;
+      break;
+    case NOT_INITIALIZED:
+      if (CombinedStatus != FAILED)
+        CombinedStatus = NOT_INITIALIZED;
+      break;
+    case ONGOING:
+      if (CombinedStatus != FAILED && CombinedStatus != NOT_INITIALIZED)
+        CombinedStatus = ONGOING;
+      break;
+    default:
+      break;
+    }
+  }
+  return CombinedStatus;
+}
+
+XRayPatchingStatus __xray_unpatch_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return controlPatching(false, ObjId);
 }
 
 XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
-  return mprotectAndPatchFunction(FuncId, true);
+  auto Ids = __xray::UnpackId(FuncId);
+  auto ObjId = Ids.first;
+  auto FnId = Ids.second;
+  return mprotectAndPatchFunction(FnId, ObjId, true);
+}
+
+XRayPatchingStatus
+__xray_patch_function_in_object(int32_t FuncId,
+                                int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return mprotectAndPatchFunction(FuncId, ObjId, true);
 }
 
 XRayPatchingStatus
 __xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
-  return mprotectAndPatchFunction(FuncId, false);
+  auto Ids = __xray::UnpackId(FuncId);
+  auto ObjId = Ids.first;
+  auto FnId = Ids.second;
+  return mprotectAndPatchFunction(FnId, ObjId, false);
+}
+
+XRayPatchingStatus
+__xray_unpatch_function_in_object(int32_t FuncId,
+                                  int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return mprotectAndPatchFunction(FuncId, ObjId, false);
 }
 
 int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
     return 0;
 
   // A relaxed write might not be visible even if the current thread gets
   // scheduled on a different CPU/NUMA node.  We need to wait for everyone to
   // have this handler installed for consistency of collected data across CPUs.
   atomic_store(&XRayArgLogger, reinterpret_cast<uint64_t>(entry),
-                            memory_order_release);
+               memory_order_release);
   return 1;
 }
 
 int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); }
 
-uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+uintptr_t
+__xray_function_address(int32_t CombinedFuncId) XRAY_NEVER_INSTRUMENT {
+  auto Ids = __xray::UnpackId(CombinedFuncId);
+  return __xray_function_address_in_object(Ids.second, Ids.first);
+}
+
+uintptr_t __xray_function_address_in_object(int32_t FuncId, int32_t ObjId)
+    XRAY_NEVER_INSTRUMENT {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    auto count = atomic_load(&XRayNumObjects, memory_order_acquire);
+    if (ObjId < 0 || ObjId >= count) {
+      Report("Unable to determine function address: invalid sled map index %d "
+             "(size is %d)\n",
+             ObjId, (int)count);
+      return 0;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
 
   if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions)
@@ -525,6 +653,29 @@ uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
 }
 
 size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT {
+  return __xray_max_function_id_in_object(0);
+}
+
+size_t __xray_max_function_id_in_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  SpinMutexLock Guard(&XRayInstrMapMutex);
+  if (ObjId < 0 || ObjId > atomic_load(&XRayNumObjects, memory_order_acquire))
+    return 0;
+  return XRayInstrMaps[ObjId].Functions;
+}
+
+size_t __xray_num_objects() XRAY_NEVER_INSTRUMENT {
   SpinMutexLock Guard(&XRayInstrMapMutex);
-  return XRayInstrMap.Functions;
+  return atomic_load(&XRayNumObjects, memory_order_acquire);
+}
+
+int32_t __xray_unpack_function_id(int32_t PackedId) {
+  return __xray::UnpackId(PackedId).second;
+}
+
+int32_t __xray_unpack_object_id(int32_t PackedId) {
+  return __xray::UnpackId(PackedId).first;
+}
+
+int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId) {
+  return __xray::MakePackedId(FuncId, ObjId);
 }
diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h
index 80c07c167f6461..ec5752a9875e3a 100644
--- a/compiler-rt/lib/xray/xray_interface_internal.h
+++ b/compiler-rt/lib/xray/xray_interface_internal.h
@@ -18,6 +18,18 @@
 #include "xray/xray_interface.h"
 #include <cstddef>
 #include <cstdint>
+#include <utility>
+
+extern "C" {
+// The following functions have to be defined in assembler, on a per-platform
+// basis. See xray_trampoline_*.S files for implementations.
+extern void __xray_FunctionEntry();
+extern void __xray_FunctionExit();
+extern void __xray_FunctionTailExit();
+extern void __xray_ArgLoggerEntry();
+extern void __xray_CustomEvent();
+extern void __xray_TypedEvent();
+}
 
 extern "C" {
 
@@ -67,36 +79,77 @@ struct XRayFunctionSledIndex {
                                                    uintptr_t(Begin));
   }
 };
+
+struct XRayTrampolines {
+  void (*EntryTrampoline)();
+  void (*ExitTrampoline)();
+  void (*TailExitTrampoline)();
+  void (*LogArgsTrampoline)();
+  void (*CustomEventTrampoline)();
+  void (*TypedEventTrampoline)();
+
+  XRayTrampolines() {
+    // These resolve to the definitions in the respective executable or DSO.
+    EntryTrampoline = __xray_FunctionEntry;
+    ExitTrampoline = __xray_FunctionExit;
+    TailExitTrampoline = __xray_FunctionTailExit;
+    LogArgsTrampoline = __xray_ArgLoggerEntry;
+    CustomEventTrampoline = __xray_CustomEvent;
+    TypedEventTrampoline = __xray_TypedEvent;
+  }
+};
+
+extern int32_t __xray_register_dso(const XRaySledEntry *SledsBegin,
+                                   const XRaySledEntry *SledsEnd,
+                                   const XRayFunctionSledIndex *FnIndexBegin,
+                                   const XRayFunctionSledIndex *FnIndexEnd,
+                                   XRayTrampolines Trampolines);
+
+extern bool __xray_deregister_dso(int32_t ObjId);
 }
 
 namespace __xray {
 
+constexpr uint32_t XRayNFnBits = 24;
+constexpr uint32_t XRayNObjBits = 8;
+
+constexpr uint32_t XRayFnBitMask = 0x00FFFFFF;
+constexpr uint32_t XRayObjBitMask = 0xFF000000;
+
+constexpr size_t XRayMaxFunctions = 1 << XRayNFnBits;
+constexpr size_t XRayMaxObjects = 1 << XRayNObjBits;
+
+inline int32_t MakePackedId(int32_t FnId, int32_t ObjId) {
+  return ((ObjId << XRayNFnBits) & XRayObjBitMask) | (FnId & XRayFnBitMask);
+}
+
+inline std::pair<int32_t, int32_t> UnpackId(int32_t CombinedId) {
+  uint32_t ObjId = (CombinedId & XRayObjBitMask) >> XRayNFnBits;
+  uint32_t FnId = CombinedId & XRayFnBitMask;
+  return {ObjId, FnId};
+}
+
 struct XRaySledMap {
   const XRaySledEntry *Sleds;
   size_t Entries;
   const XRayFunctionSledIndex *SledsIndex;
   size_t Functions;
+  XRayTrampolines Trampolines;
+  bool FromDSO;
+  bool Loaded;
 };
 
 bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
                         void (*Trampoline)());
-bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                       void (*Trampoline)());
 bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
-                           const XRaySledEntry &Sled);
-bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
-bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+                           const XRaySledEntry &Sled, void (*Trampoline)());
+bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                      void (*Trampoline)());
+bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                     void (*Trampoline)());
 
 } // namespace __xray
 
-extern "C" {
-// The following functions have to be defined in assembler, on a per-platform
-// basis. See xray_trampoline_*.S files for implementations.
-extern void __xray_FunctionEntry();
-extern void __xray_FunctionExit();
-extern void __xray_FunctionTailExit();
-extern void __xray_ArgLoggerEntry();
-extern void __xray_CustomEvent();
-extern void __xray_TypedEvent();
-}
-
 #endif
diff --git a/compiler-rt/lib/xray/xray_trampoline_x86_64.S b/compiler-rt/lib/xray/xray_trampoline_x86_64.S
index ff3ac91071a60e..31c0deb34e0cb1 100644
--- a/compiler-rt/lib/xray/xray_trampoline_x86_64.S
+++ b/compiler-rt/lib/xray/xray_trampoline_x86_64.S
@@ -108,6 +108,16 @@
 	.section __TEXT,__text
 #endif
 
+.macro LOAD_HANDLER_ADDR handler
+#if !defined(XRAY_PIC)
+	movq	ASM_SYMBOL(\handler)(%rip), %rax
+#else
+	movq	ASM_SYMBOL(\handler)@GOTPCREL(%rip), %rax
+	movq	(%rax), %rax
+#endif
+.endm
+
+
 //===----------------------------------------------------------------------===//
 
 	.globl ASM_SYMBOL(__xray_FunctionEntry)
@@ -122,7 +132,7 @@ ASM_SYMBOL(__xray_FunctionEntry):
 
 	// This load has to be atomic, it's concurrent with __xray_patch().
 	// On x86/amd64, a simple (type-aligned) MOV instruction is enough.
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq	%rax, %rax
 	je	LOCAL_LABEL(tmp0)
 
@@ -160,7 +170,7 @@ ASM_SYMBOL(__xray_FunctionExit):
 	movupd	%xmm1, 16(%rsp)
 	movq	%rax, 8(%rsp)
 	movq	%rdx, 0(%rsp)
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq %rax,%rax
 	je	LOCAL_LABEL(tmp2)
 
@@ -196,7 +206,7 @@ ASM_SYMBOL(__xray_FunctionTailExit):
 	ALIGN_STACK_16B
 	SAVE_REGISTERS
 
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq %rax,%rax
 	je	LOCAL_LABEL(tmp4)
 
@@ -225,12 +235,12 @@ ASM_SYMBOL(__xray_ArgLoggerEntry):
 	SAVE_REGISTERS
 
 	// Again, these function pointer loads must be atomic; MOV is fine.
-	movq	ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray13XRayArgLoggerE
 	testq	%rax, %rax
 	jne	LOCAL_LABEL(arg1entryLog)
 
 	// If [arg1 logging handler] not set, defer to no-arg logging.
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq	%rax, %rax
 	je	LOCAL_LABEL(arg1entryFail)
 
@@ -269,7 +279,7 @@ ASM_SYMBOL(__xray_CustomEvent):
 
 	// We take two arguments to this trampoline, which should be in rdi	and rsi
 	// already.
-	movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray22XRayPatchedCustomEventE
 	testq %rax,%rax
 	je LOCAL_LABEL(customEventCleanup)
 
@@ -294,7 +304,7 @@ ASM_SYMBOL(__xray_TypedEvent):
 
 	// We pass three arguments to this trampoline, which should be in rdi, rsi
 	// and rdx without our intervention.
-	movq ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray21XRayPatchedTypedEventE
 	testq %rax,%rax
 	je LOCAL_LABEL(typedEventCleanup)
 
diff --git a/compiler-rt/lib/xray/xray_x86_64.cpp b/compiler-rt/lib/xray/xray_x86_64.cpp
index b9666a40861d48..663a51b2686614 100644
--- a/compiler-rt/lib/xray/xray_x86_64.cpp
+++ b/compiler-rt/lib/xray/xray_x86_64.cpp
@@ -170,7 +170,8 @@ bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                       const XRaySledEntry &Sled,
+                       void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
@@ -192,11 +193,11 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
   // Prerequisite is to compute the relative offset fo the
   // __xray_FunctionExit function's address.
   const uint64_t Address = Sled.address();
-  int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
+  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
                              (static_cast<int64_t>(Address) + 11);
   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
     Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
-           reinterpret_cast<void *>(__xray_FunctionExit),
+           reinterpret_cast<void *>(Trampoline),
            reinterpret_cast<void *>(Address));
     return false;
   }
@@ -217,16 +218,16 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                           const XRaySledEntry &Sled,
+                           void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the tail call sled with a similar
   // sequence as the entry sled, but calls the tail exit sled instead.
   const uint64_t Address = Sled.address();
-  int64_t TrampolineOffset =
-      reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
-      (static_cast<int64_t>(Address) + 11);
+  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
+                             (static_cast<int64_t>(Address) + 11);
   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
     Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
-           reinterpret_cast<void *>(__xray_FunctionTailExit),
+           reinterpret_cast<void *>(Trampoline),
            reinterpret_cast<void *>(Address));
     return false;
   }
@@ -247,7 +248,8 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
-                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                      const XRaySledEntry &Sled,
+                      void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
@@ -275,7 +277,8 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
-                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                     const XRaySledEntry &Sled,
+                     void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
diff --git a/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
new file mode 100644
index 00000000000000..49544cadcae876
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
@@ -0,0 +1,47 @@
+// Testing shared library support in basic logging mode.
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=false,xray_mode=xray-basic,xray_logfile_base=basic-mode-dso-,verbosity=1" XRAY_BASIC_OPTIONS="func_duration_threshold_us=0" %run %t/main.o 2>&1 | FileCheck %s
+// RUN: %llvm_xray account --format=csv --sort=funcid "`ls basic-mode-dso-* | head -1`" | FileCheck --check-prefix=ACCOUNT %s
+// RUN: rm basic-mode-dso-*
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+#include <unistd.h>
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+  sleep(1);
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  // Explicit patching to ensure the DSO has been loaded
+  __xray_patch();
+  instrumented_in_executable();
+  // CHECK: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+#include <unistd.h>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}
+
+// ACCOUNT: funcid,count,min,median,90%ile,99%ile,max,sum,debug,function
+// ACCOUNT-NEXT: 1,1,{{.*}}
+// ACCOUNT-NEXT: 16777217,1,{{.*}}
diff --git a/compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp b/compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp
new file mode 100644
index 00000000000000..d3e3b4e5f2904d
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp
@@ -0,0 +1,14 @@
+// Test that the DSO-local runtime library has been linked if -fxray-enable-shared is passed.
+//
+// RUN: %clangxx -fxray-instrument -fxray-enable-shared %s -shared -o %t.so
+// RUN: llvm-nm %t.so | FileCheck %s --check-prefix ENABLED
+
+// RUN: %clangxx -fxray-instrument %s -shared -o %t.so
+// RUN: llvm-nm %t.so | FileCheck %s --check-prefix DISABLED
+//
+// REQUIRES: target=x86_64{{.*}}
+
+[[clang::xray_always_instrument]] int always_instrumented() { return 42; }
+
+// ENABLED: __start_xray_instr_map
+// DISABLED-NOT: __start_xray_instr_map
\ No newline at end of file
diff --git a/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp b/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
new file mode 100644
index 00000000000000..e7929c018352b4
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
@@ -0,0 +1,110 @@
+// Check that we can patch and un-patch DSOs loaded with dlopen.
+//
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -rdynamic -fxray-instrument -fxray-enable-shared -std=c++11 %t/main.cpp -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=true" %run %t/main.o %t/testlib.so 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+#include <dlfcn.h>
+
+bool called = false;
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+  called = true;
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+typedef void (*dso_func_type)();
+
+int main(int argc, char **argv) {
+  if (argc < 2) {
+    printf("Shared library argument missing\n");
+    // CHECK-NOT: Shared library argument missing
+    return 1;
+  }
+
+  const char *dso_path = argv[1];
+
+  void *dso_handle = dlopen(dso_path, RTLD_LAZY);
+  if (!dso_handle) {
+    printf("Failed to load shared library\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+      return 1;
+    }
+    return 1;
+  }
+
+  dso_func_type instrumented_in_dso =
+      (dso_func_type)dlsym(dso_handle, "_Z19instrumented_in_dsov");
+  if (!instrumented_in_dso) {
+    printf("Failed to find symbol\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+      return 1;
+    }
+    return 1;
+  }
+
+  __xray_set_handler(test_handler);
+
+  instrumented_in_executable();
+  // CHECK: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+
+  auto status = __xray_unpatch();
+  printf("unpatching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: unpatching status: 1
+
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+
+  status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+
+  instrumented_in_executable();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+
+  dlclose(dso_handle);
+
+  status = __xray_unpatch();
+  printf("unpatching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: unpatching status: 1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}
diff --git a/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
new file mode 100644
index 00000000000000..9e80fc376795ae
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
@@ -0,0 +1,45 @@
+// Checking that DSOs are automatically patched upon load, if patch_premain is passed.
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=true,verbosity=1" %run %t/main.o 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  __xray_set_handler(test_handler);
+  instrumented_in_executable();
+  // CHECK: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}
diff --git a/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp
new file mode 100644
index 00000000000000..900af42c5334c7
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp
@@ -0,0 +1,75 @@
+// Check that we can patch and un-patch on demand, and that logging gets invoked
+// appropriately.
+//
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=false" %run %t/main.o 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+
+bool called = false;
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+  called = true;
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  __xray_set_handler(test_handler);
+  instrumented_in_executable();
+  // CHECK: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK: instrumented_in_dso called
+  auto status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  instrumented_in_executable();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  status = __xray_unpatch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+  status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  __xray_remove_handler();
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+  status = __xray_unpatch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}



More information about the llvm-commits mailing list