[clang] a440203 - [XRay] Add support for instrumentation of DSOs on x86_64 (#90959)

via cfe-commits cfe-commits at lists.llvm.org
Fri Oct 11 02:23:38 PDT 2024


Author: Sebastian Kreutzer
Date: 2024-10-11T11:23:34+02:00
New Revision: a4402039bffd788b9af82435fd5a2fb311fdc6e8

URL: https://github.com/llvm/llvm-project/commit/a4402039bffd788b9af82435fd5a2fb311fdc6e8
DIFF: https://github.com/llvm/llvm-project/commit/a4402039bffd788b9af82435fd5a2fb311fdc6e8.diff

LOG: [XRay] Add support for instrumentation of DSOs on x86_64 (#90959)

This PR introduces shared library (DSO) support for XRay based on a
revised version of the implementation outlined in [this
RFC](https://discourse.llvm.org/t/rfc-upstreaming-dso-instrumentation-support-for-xray/73000).
The feature enables the patching and handling of events from DSOs,
supporting both libraries linked at startup or explicitly loaded, e.g.
via `dlopen`.
This patch adds the following:
- The `-fxray-shared` flag to enable the feature (turned off by default)
- A small runtime library that is linked into every instrumented DSO,
providing position-independent trampolines and code to register with the
main XRay runtime
- Changes to the XRay runtime to support management and patching of
multiple objects

These changes are fully backward compatible, i.e. running without
instrumented DSOs will produce identical traces (in terms of recorded
function IDs) to the previous implementation.

Due to my limited ability to test on other architectures, this feature
is only implemented and tested with x86_64. Extending support to other
architectures is fairly straightforward, requiring only a
position-independent implementation of the architecture-specific
trampoline implementation (see
`compiler-rt/lib/xray/xray_trampoline_x86_64.S` for reference).

This patch does not include any functionality to resolve function IDs
from DSOs for the provided logging/tracing modes. These modes still work
and will record calls from DSOs, but symbol resolution for these
functions in not available. Getting this to work properly requires
recording information about the loaded DSOs and should IMO be discussed
in a separate RFC, as there are mulitple feasible approaches.

@petrhosek @jplehr

Added: 
    clang/test/Driver/XRay/xray-shared.cpp
    compiler-rt/lib/xray/xray_dso_init.cpp
    compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
    compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp
    compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
    compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp
    compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
    compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp

Modified: 
    clang/include/clang/Basic/CodeGenOptions.def
    clang/include/clang/Driver/Options.td
    clang/include/clang/Driver/XRayArgs.h
    clang/lib/Driver/ToolChains/CommonArgs.cpp
    clang/lib/Driver/XRayArgs.cpp
    compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
    compiler-rt/cmake/config-ix.cmake
    compiler-rt/include/xray/xray_interface.h
    compiler-rt/lib/xray/CMakeLists.txt
    compiler-rt/lib/xray/xray_init.cpp
    compiler-rt/lib/xray/xray_interface.cpp
    compiler-rt/lib/xray/xray_interface_internal.h
    compiler-rt/lib/xray/xray_trampoline_x86_64.S
    compiler-rt/lib/xray/xray_x86_64.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index eac831278ee20d..e45370bde74a5d 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -136,6 +136,8 @@ CODEGENOPT(XRayIgnoreLoops , 1, 0)
 ///< Emit the XRay function index section.
 CODEGENOPT(XRayFunctionIndex , 1, 1)
 
+///< Set when -fxray-shared is enabled
+CODEGENOPT(XRayShared , 1, 0)
 
 ///< Set the minimum number of instructions in a function to determine selective
 ///< XRay instrumentation.

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index d306c751505e98..4ee16e213d0e13 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2946,6 +2946,11 @@ def fxray_selected_function_group :
   HelpText<"When using -fxray-function-groups, select which group of functions to instrument. Valid range is 0 to fxray-function-groups - 1">,
   MarshallingInfoInt<CodeGenOpts<"XRaySelectedFunctionGroup">, "0">;
 
+defm xray_shared : BoolFOption<"xray-shared",
+  CodeGenOpts<"XRayShared">, DefaultFalse,
+  PosFlag<SetTrue, [], [ClangOption, CC1Option],
+          "Enable shared library instrumentation with XRay">,
+  NegFlag<SetFalse>>;
 
 defm fine_grained_bitfield_accesses : BoolOption<"f", "fine-grained-bitfield-accesses",
   CodeGenOpts<"FineGrainedBitfieldAccesses">, DefaultFalse,

diff  --git a/clang/include/clang/Driver/XRayArgs.h b/clang/include/clang/Driver/XRayArgs.h
index bdd3d979547eed..8fbcf469e5bad1 100644
--- a/clang/include/clang/Driver/XRayArgs.h
+++ b/clang/include/clang/Driver/XRayArgs.h
@@ -27,6 +27,7 @@ class XRayArgs {
   XRayInstrSet InstrumentationBundle;
   llvm::opt::Arg *XRayInstrument = nullptr;
   bool XRayRT = true;
+  bool XRayShared = false;
 
 public:
   /// Parses the XRay arguments from an argument list.
@@ -35,6 +36,9 @@ class XRayArgs {
                llvm::opt::ArgStringList &CmdArgs, types::ID InputType) const;
 
   bool needsXRayRt() const { return XRayInstrument && XRayRT; }
+  bool needsXRayDSORt() const {
+    return XRayInstrument && XRayRT && XRayShared;
+  }
   llvm::ArrayRef<std::string> modeList() const { return Modes; }
   XRayInstrSet instrumentationBundle() const { return InstrumentationBundle; }
 };

diff  --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 0c6a585c3acffd..0a1b7c209563e8 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1613,10 +1613,14 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
 }
 
 bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringList &CmdArgs) {
-  if (Args.hasArg(options::OPT_shared))
-    return false;
-
-  if (TC.getXRayArgs().needsXRayRt()) {
+  if (Args.hasArg(options::OPT_shared)) {
+    if (TC.getXRayArgs().needsXRayDSORt()) {
+      CmdArgs.push_back("--whole-archive");
+      CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray-dso"));
+      CmdArgs.push_back("--no-whole-archive");
+      return true;
+    }
+  } else if (TC.getXRayArgs().needsXRayRt()) {
     CmdArgs.push_back("--whole-archive");
     CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray"));
     for (const auto &Mode : TC.getXRayArgs().modeList())

diff  --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp
index 8c5134e2501358..411054e067cb42 100644
--- a/clang/lib/Driver/XRayArgs.cpp
+++ b/clang/lib/Driver/XRayArgs.cpp
@@ -63,6 +63,23 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
         << XRayInstrument->getSpelling() << Triple.str();
   }
 
+  if (Args.hasFlag(options::OPT_fxray_shared,
+                   options::OPT_fno_xray_shared, false)) {
+    XRayShared = true;
+
+    // DSO instrumentation is currently limited to x86_64
+    if (Triple.getArch() != llvm::Triple::x86_64) {
+      D.Diag(diag::err_drv_unsupported_opt_for_target)
+          << "-fxray-shared" << Triple.str();
+    }
+
+    unsigned PICLvl = std::get<1>(tools::ParsePICArgs(TC, Args));
+    if (!PICLvl) {
+      D.Diag(diag::err_opt_not_valid_without_opt)
+          << "-fxray-shared" << "-fPIC";
+    }
+  }
+
   // Both XRay and -fpatchable-function-entry use
   // TargetOpcode::PATCHABLE_FUNCTION_ENTER.
   if (Arg *A = Args.getLastArg(options::OPT_fpatchable_function_entry_EQ))
@@ -177,6 +194,10 @@ void XRayArgs::addArgs(const ToolChain &TC, const ArgList &Args,
   Args.addOptOutFlag(CmdArgs, options::OPT_fxray_function_index,
                      options::OPT_fno_xray_function_index);
 
+  if (XRayShared)
+    Args.addOptInFlag(CmdArgs, options::OPT_fxray_shared,
+                      options::OPT_fno_xray_shared);
+
   if (const Arg *A =
           Args.getLastArg(options::OPT_fxray_instruction_threshold_EQ)) {
     int Value;

diff  --git a/clang/test/Driver/XRay/xray-shared.cpp b/clang/test/Driver/XRay/xray-shared.cpp
new file mode 100644
index 00000000000000..215854e1fc7cef
--- /dev/null
+++ b/clang/test/Driver/XRay/xray-shared.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fpic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-PIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC
+// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-pic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC
+
+// On 64 bit darwin, PIC is always enabled
+// RUN: %clang -### --target=x86_64-apple-darwin -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+
+// Check unsupported targets
+// RUN: not %clang -### --target=aarch64-pc-freebsd -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
+// RUN: not %clang -### --target=arm64-apple-macos -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
+
+// CHECK: "-cc1" {{.*}}"-fxray-instrument" {{.*}}"-fxray-shared"
+// ERR-TARGET:   error: unsupported option '-fxray-shared' for target
+// ERR-PIC:   error: option '-fxray-shared' cannot be specified without '-fPIC'
+

diff  --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index 809e9277156912..50a4256b82fe4e 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -104,6 +104,7 @@ else()
 set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
 		powerpc64le ${HEXAGON} ${LOONGARCH64})
 endif()
+set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64})
 set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})
 
 if (UNIX)

diff  --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index a93a88a9205001..6134c9876b38e9 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -668,6 +668,9 @@ if(APPLE)
   list_intersect(XRAY_SUPPORTED_ARCH
     ALL_XRAY_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(XRAY_DSO_SUPPORTED_ARCH
+    ALL_XRAY_DSO_SUPPORTED_ARCH
+    SANITIZER_COMMON_SUPPORTED_ARCH)
   list_intersect(SHADOWCALLSTACK_SUPPORTED_ARCH
     ALL_SHADOWCALLSTACK_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
@@ -702,6 +705,7 @@ else()
   filter_available_targets(CFI_SUPPORTED_ARCH ${ALL_CFI_SUPPORTED_ARCH})
   filter_available_targets(SCUDO_STANDALONE_SUPPORTED_ARCH ${ALL_SCUDO_STANDALONE_SUPPORTED_ARCH})
   filter_available_targets(XRAY_SUPPORTED_ARCH ${ALL_XRAY_SUPPORTED_ARCH})
+  filter_available_targets(XRAY_DSO_SUPPORTED_ARCH ${ALL_XRAY_DSO_SUPPORTED_ARCH})
   filter_available_targets(SHADOWCALLSTACK_SUPPORTED_ARCH
     ${ALL_SHADOWCALLSTACK_SUPPORTED_ARCH})
   filter_available_targets(GWP_ASAN_SUPPORTED_ARCH ${ALL_GWP_ASAN_SUPPORTED_ARCH})

diff  --git a/compiler-rt/include/xray/xray_interface.h b/compiler-rt/include/xray/xray_interface.h
index 727431c04e4f73..717cfe292ce416 100644
--- a/compiler-rt/include/xray/xray_interface.h
+++ b/compiler-rt/include/xray/xray_interface.h
@@ -93,31 +93,74 @@ enum XRayPatchingStatus {
   FAILED = 3,
 };
 
-/// This tells XRay to patch the instrumentation points. See XRayPatchingStatus
+/// This tells XRay to patch the instrumentation points in all currently loaded objects. See XRayPatchingStatus
 /// for possible result values.
 extern XRayPatchingStatus __xray_patch();
 
+/// This tells XRay to patch the instrumentation points in the given object.
+/// See XRayPatchingStatus for possible result values.
+extern XRayPatchingStatus __xray_patch_object(int32_t ObjId);
+
 /// Reverses the effect of __xray_patch(). See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_unpatch();
 
-/// This patches a specific function id. See XRayPatchingStatus for possible
+/// Reverses the effect of __xray_patch_object. See XRayPatchingStatus for possible
+/// result values.
+extern XRayPatchingStatus __xray_unpatch_object(int32_t ObjId);
+
+/// This unpacks the given (packed) function id and patches
+/// the corresponding function.  See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_patch_function(int32_t FuncId);
 
-/// This unpatches a specific function id. See XRayPatchingStatus for possible
+/// This patches a specific function in the given object. See XRayPatchingStatus for possible
+/// result values.
+extern XRayPatchingStatus __xray_patch_function_in_object(int32_t FuncId,
+                                                          int32_t ObjId);
+
+/// This unpacks the given (packed) function id and unpatches
+/// the corresponding function. See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_unpatch_function(int32_t FuncId);
 
-/// This function returns the address of the function provided a valid function
-/// id. We return 0 if we encounter any error, even if 0 may be a valid function
+/// This unpatches a specific function in the given object.
+/// See XRayPatchingStatus for possible result values.
+extern XRayPatchingStatus __xray_unpatch_function_in_object(int32_t FuncId,
+                                                            int32_t ObjId);
+
+/// This function unpacks the given (packed) function id and returns the address of the corresponding function. We return 0 if we encounter any error, even if 0 may be a valid function
 /// address.
 extern uintptr_t __xray_function_address(int32_t FuncId);
 
-/// This function returns the maximum valid function id. Returns 0 if we
+/// This function returns the address of the function in the given object provided valid function and object
+/// ids. We return 0 if we encounter any error, even if 0 may be a valid function
+/// address.
+extern uintptr_t __xray_function_address_in_object(int32_t FuncId,
+                                                   int32_t ObjId);
+
+/// This function returns the maximum valid function id for the main executable (object id = 0). Returns 0 if we
 /// encounter errors (when there are no instrumented functions, etc.).
 extern size_t __xray_max_function_id();
 
+/// This function returns the maximum valid function id for the given object. Returns 0 if we
+/// encounter errors (when there are no instrumented functions, etc.).
+extern size_t __xray_max_function_id_in_object(int32_t ObjId);
+
+/// This function returns the number of previously registered objects (executable + loaded DSOs).
+/// Returns 0 if XRay has not been initialized.
+extern size_t __xray_num_objects();
+
+/// Unpacks the function id from the given packed id.
+extern int32_t __xray_unpack_function_id(int32_t PackedId);
+
+/// Unpacks the object id from the given packed id.
+extern int32_t __xray_unpack_object_id(int32_t PackedId);
+
+/// Creates and returns a packed id from the given function and object ids.
+/// If the ids do not fit within the reserved number of bits for each part, the high bits are truncated.
+extern int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId);
+
 /// Initialize the required XRay data structures. This is useful in cases where
 /// users want to control precisely when the XRay instrumentation data
 /// structures are initialized, for example when the XRay library is built with

diff  --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index cf7b5062aae32d..f38c07420c9abf 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -10,6 +10,10 @@ set(XRAY_SOURCES
   xray_utils.cpp
   )
 
+set(XRAY_DSO_SOURCES
+  xray_dso_init.cpp
+  )
+
 # Implementation files for all XRay modes.
 set(XRAY_FDR_MODE_SOURCES
   xray_fdr_flags.cpp
@@ -33,6 +37,11 @@ set(x86_64_SOURCES
   xray_trampoline_x86_64.S
   )
 
+set(x86_64_DSO_SOURCES
+   xray_trampoline_x86_64.S
+   )
+
+
 set(arm_SOURCES
   xray_arm.cpp
   xray_trampoline_arm.S
@@ -128,10 +137,12 @@ set(XRAY_IMPL_HEADERS
 # consumption by tests.
 set(XRAY_ALL_SOURCE_FILES
   ${XRAY_SOURCES}
+  ${XRAY_DSO_SOURCES}
   ${XRAY_FDR_MODE_SOURCES}
   ${XRAY_BASIC_MODE_SOURCES}
   ${XRAY_PROFILING_MODE_SOURCES}
   ${x86_64_SOURCES}
+  ${x86_64_DSO_SOURCES}
   ${arm_SOURCES}
   ${armhf_SOURCES}
   ${hexagon_SOURCES}
@@ -162,6 +173,9 @@ set(XRAY_CFLAGS
   ${COMPILER_RT_CXX_CFLAGS})
 set(XRAY_COMMON_DEFINITIONS SANITIZER_COMMON_NO_REDEFINE_BUILTINS XRAY_HAS_EXCEPTIONS=1)
 
+# DSO trampolines need to be compiled with GOT addressing
+set(XRAY_COMMON_DEFINITIONS_DSO ${XRAY_COMMON_DEFINITIONS} XRAY_PIC)
+
 # Too many existing bugs, needs cleanup.
 append_list_if(COMPILER_RT_HAS_WNO_FORMAT -Wno-format XRAY_CFLAGS)
 
@@ -201,7 +215,16 @@ if (APPLE)
     CFLAGS ${XRAY_CFLAGS}
     DEFS ${XRAY_COMMON_DEFINITIONS}
     DEPS ${XRAY_DEPS})
+  add_compiler_rt_object_libraries(RTXrayDSO
+    OS ${XRAY_SUPPORTED_OS}
+    ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+    SOURCES ${XRAY_DSO_SOURCES}
+    ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+    CFLAGS ${XRAY_CFLAGS}
+    DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+    DEPS ${XRAY_DEPS})
   set(XRAY_RTXRAY_ARCH_LIBS "")
+  set(XRAY_DSO_RTXRAY_ARCH_LIBS "")
   foreach(arch ${XRAY_SUPPORTED_ARCH})
     if(NOT ${arch} IN_LIST XRAY_SOURCE_ARCHS)
       continue()
@@ -215,6 +238,17 @@ if (APPLE)
       DEFS ${XRAY_COMMON_DEFINITIONS}
       DEPS ${XRAY_DEPS})
     list(APPEND XRAY_RTXRAY_ARCH_LIBS RTXray_${arch})
+    if (${arch} IN_LIST XRAY_DSO_SUPPORTED_ARCH)
+      add_compiler_rt_object_libraries(RTXrayDSO_${arch}
+        OS ${XRAY_SUPPORTED_OS}
+        ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+        SOURCES ${${arch}_DSO_SOURCES}
+        ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+        CFLAGS ${XRAY_CFLAGS}
+        DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+        DEPS ${XRAY_DEPS})
+      list(APPEND XRAY_DSO_RTXRAY_ARCH_LIBS RTXrayDSO_${arch})
+    endif()
   endforeach()
   add_compiler_rt_object_libraries(RTXrayFDR
     OS ${XRAY_SUPPORTED_OS}
@@ -252,6 +286,17 @@ if (APPLE)
     LINK_FLAGS ${XRAY_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
     LINK_LIBS ${XRAY_LINK_LIBS}
     PARENT_TARGET xray)
+  add_compiler_rt_runtime(clang_rt.xray-dso
+    STATIC
+    OS ${XRAY_SUPPORTED_OS}
+    ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+    OBJECT_LIBS RTXrayDSO ${XRAY_DSO_RTXRAY_ARCH_LIBS}
+    CFLAGS ${XRAY_CFLAGS}
+    DEFS ${XRAY_COMMON_DEFINITIONS}
+    LINK_FLAGS ${XRAY_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
+    LINK_LIBS ${XRAY_LINK_LIBS}
+    PARENT_TARGET xray)
+
   add_compiler_rt_runtime(clang_rt.xray-fdr
     STATIC
     OS ${XRAY_SUPPORTED_OS}
@@ -346,16 +391,37 @@ else() # not Apple
       DEFS ${XRAY_COMMON_DEFINITIONS}
       OBJECT_LIBS RTXrayBASIC
       PARENT_TARGET xray)
-   # Profiler Mode runtime
-   add_compiler_rt_runtime(clang_rt.xray-profiling
-     STATIC
-     ARCHS ${arch}
-     CFLAGS ${XRAY_CFLAGS}
-     LINK_FLAGS ${XRAY_LINK_FLAGS}
-     LINK_LIBS ${XRAY_LINK_LIBS}
-     DEFS ${XRAY_COMMON_DEFINITIONS}
-     OBJECT_LIBS RTXrayPROFILING
-     PARENT_TARGET xray)
+    # Profiler Mode runtime
+    add_compiler_rt_runtime(clang_rt.xray-profiling
+      STATIC
+      ARCHS ${arch}
+      CFLAGS ${XRAY_CFLAGS}
+      LINK_FLAGS ${XRAY_LINK_FLAGS}
+      LINK_LIBS ${XRAY_LINK_LIBS}
+      DEFS ${XRAY_COMMON_DEFINITIONS}
+      OBJECT_LIBS RTXrayPROFILING
+      PARENT_TARGET xray)
+
+    if (${arch} IN_LIST XRAY_DSO_SUPPORTED_ARCH)
+      # TODO: Only implemented for X86 at the moment
+      add_compiler_rt_object_libraries(RTXrayDSO
+        ARCHS ${arch}
+        SOURCES ${XRAY_DSO_SOURCES} ${${arch}_DSO_SOURCES} 
+        ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+        CFLAGS ${XRAY_CFLAGS}
+        DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+        DEPS ${XRAY_DEPS})
+      # DSO runtime archive
+      add_compiler_rt_runtime(clang_rt.xray-dso
+        STATIC
+        ARCHS ${arch}
+        CFLAGS ${XRAY_CFLAGS}
+        LINK_FLAGS ${XRAY_LINK_FLAGS}
+        LINK_LIBS ${XRAY_LINK_LIBS}
+        DEFS ${XRAY_COMMON_DEFINITIONS}
+        OBJECT_LIBS RTXrayDSO
+        PARENT_TARGET xray)
+    endif()
   endforeach()
 endif() # not Apple
 

diff  --git a/compiler-rt/lib/xray/xray_dso_init.cpp b/compiler-rt/lib/xray/xray_dso_init.cpp
new file mode 100644
index 00000000000000..eb754db54c64fa
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_dso_init.cpp
@@ -0,0 +1,62 @@
+//===-- xray_init.cpp -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay initialisation logic for DSOs.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+
+using namespace __sanitizer;
+
+extern "C" {
+extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+
+#if SANITIZER_APPLE
+// HACK: This is a temporary workaround to make XRay build on
+// Darwin, but it will probably not work at runtime.
+extern const XRaySledEntry __start_xray_instr_map[] = {};
+extern const XRaySledEntry __stop_xray_instr_map[] = {};
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] = {};
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] = {};
+#endif
+}
+
+// Handler functions to call in the patched entry/exit sled.
+extern atomic_uintptr_t XRayPatchedFunction;
+extern atomic_uintptr_t XRayArgLogger;
+extern atomic_uintptr_t XRayPatchedCustomEvent;
+extern atomic_uintptr_t XRayPatchedTypedEvent;
+
+static int __xray_object_id{-1};
+
+// Note: .preinit_array initialization does not work for DSOs
+__attribute__((constructor(0))) static void
+__xray_init_dso() XRAY_NEVER_INSTRUMENT {
+  // Register sleds in main XRay runtime.
+  __xray_object_id =
+      __xray_register_dso(__start_xray_instr_map, __stop_xray_instr_map,
+                          __start_xray_fn_idx, __stop_xray_fn_idx, {});
+}
+
+__attribute__((destructor(0))) static void
+__xray_finalize_dso() XRAY_NEVER_INSTRUMENT {
+  // Inform the main runtime that this DSO is no longer used.
+  __xray_deregister_dso(__xray_object_id);
+}

diff  --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index f22a31b95686d0..53c93be89cd148 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -16,6 +16,8 @@
 #include <unistd.h>
 
 #include "sanitizer_common/sanitizer_common.h"
+#include "xray/xray_interface.h"
+#include "xray_allocator.h"
 #include "xray_defs.h"
 #include "xray_flags.h"
 #include "xray_interface_internal.h"
@@ -28,7 +30,7 @@ extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak));
 extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak));
 
 #if SANITIZER_APPLE
-// HACK: This is a temporary workaround to make XRay build on 
+// HACK: This is a temporary workaround to make XRay build on
 // Darwin, but it will probably not work at runtime.
 const XRaySledEntry __start_xray_instr_map[] = {};
 extern const XRaySledEntry __stop_xray_instr_map[] = {};
@@ -43,14 +45,16 @@ using namespace __xray;
 // the weak symbols defined above (__start_xray_inst_map and
 // __stop_xray_instr_map) to initialise the instrumentation map that XRay uses
 // for runtime patching/unpatching of instrumentation points.
-//
-// FIXME: Support DSO instrumentation maps too. The current solution only works
-// for statically linked executables.
 atomic_uint8_t XRayInitialized{0};
 
 // This should always be updated before XRayInitialized is updated.
 SpinMutex XRayInstrMapMutex;
-XRaySledMap XRayInstrMap;
+
+//  Contains maps for the main executable as well as DSOs.
+XRaySledMap *XRayInstrMaps;
+
+// Number of binary objects registered.
+atomic_uint32_t XRayNumObjects{0};
 
 // Global flag to determine whether the flags have been initialized.
 atomic_uint8_t XRayFlagsInitialized{0};
@@ -58,6 +62,63 @@ atomic_uint8_t XRayFlagsInitialized{0};
 // A mutex to allow only one thread to initialize the XRay data structures.
 SpinMutex XRayInitMutex;
 
+// Registers XRay sleds and trampolines coming from the main executable or one
+// of the linked DSOs.
+// Returns the object ID if registration is successful, -1 otherwise.
+int32_t
+__xray_register_sleds(const XRaySledEntry *SledsBegin,
+                      const XRaySledEntry *SledsEnd,
+                      const XRayFunctionSledIndex *FnIndexBegin,
+                      const XRayFunctionSledIndex *FnIndexEnd, bool FromDSO,
+                      XRayTrampolines Trampolines) XRAY_NEVER_INSTRUMENT {
+  if (!SledsBegin || !SledsEnd) {
+    Report("Invalid XRay sleds.\n");
+    return -1;
+  }
+  XRaySledMap SledMap;
+  SledMap.FromDSO = FromDSO;
+  SledMap.Loaded = true;
+  SledMap.Trampolines = Trampolines;
+  SledMap.Sleds = SledsBegin;
+  SledMap.Entries = SledsEnd - SledsBegin;
+  if (FnIndexBegin != nullptr) {
+    SledMap.SledsIndex = FnIndexBegin;
+    SledMap.Functions = FnIndexEnd - FnIndexBegin;
+  } else {
+    size_t CountFunctions = 0;
+    uint64_t LastFnAddr = 0;
+
+    for (std::size_t I = 0; I < SledMap.Entries; I++) {
+      const auto &Sled = SledMap.Sleds[I];
+      const auto Function = Sled.function();
+      if (Function != LastFnAddr) {
+        CountFunctions++;
+        LastFnAddr = Function;
+      }
+    }
+    SledMap.SledsIndex = nullptr;
+    SledMap.Functions = CountFunctions;
+  }
+  if (SledMap.Functions >= XRayMaxFunctions) {
+    Report("Too many functions! Maximum is %ld\n", XRayMaxFunctions);
+    return -1;
+  }
+
+  if (Verbosity())
+    Report("Registering %d new functions!\n", SledMap.Functions);
+
+  {
+    SpinMutexLock Guard(&XRayInstrMapMutex);
+    auto Idx = atomic_fetch_add(&XRayNumObjects, 1, memory_order_acq_rel);
+    if (Idx >= XRayMaxObjects) {
+      Report("Too many objects registered! Maximum is %ld\n", XRayMaxObjects);
+      return -1;
+    }
+    XRayInstrMaps[Idx] = std::move(SledMap);
+    return Idx;
+  }
+}
+
 // __xray_init() will do the actual loading of the current process' memory map
 // and then proceed to look for the .xray_instr_map section/segment.
 void __xray_init() XRAY_NEVER_INSTRUMENT {
@@ -80,29 +141,21 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
     return;
   }
 
-  {
-    SpinMutexLock Guard(&XRayInstrMapMutex);
-    XRayInstrMap.Sleds = __start_xray_instr_map;
-    XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
-    if (__start_xray_fn_idx != nullptr) {
-      XRayInstrMap.SledsIndex = __start_xray_fn_idx;
-      XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx;
-    } else {
-      size_t CountFunctions = 0;
-      uint64_t LastFnAddr = 0;
-
-      for (std::size_t I = 0; I < XRayInstrMap.Entries; I++) {
-        const auto &Sled = XRayInstrMap.Sleds[I];
-        const auto Function = Sled.function();
-        if (Function != LastFnAddr) {
-          CountFunctions++;
-          LastFnAddr = Function;
-        }
-      }
+  atomic_store(&XRayNumObjects, 0, memory_order_release);
 
-      XRayInstrMap.Functions = CountFunctions;
-    }
+  // Pre-allocation takes up approx. 5kB for XRayMaxObjects=64.
+  XRayInstrMaps = allocateBuffer<XRaySledMap>(XRayMaxObjects);
+
+  int MainBinaryId =
+      __xray_register_sleds(__start_xray_instr_map, __stop_xray_instr_map,
+                            __start_xray_fn_idx, __stop_xray_fn_idx, false, {});
+
+  // The executable should always get ID 0.
+  if (MainBinaryId != 0) {
+    Report("Registering XRay sleds failed.\n");
+    return;
   }
+
   atomic_store(&XRayInitialized, true, memory_order_release);
 
 #ifndef XRAY_NO_PREINIT
@@ -111,6 +164,84 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
 #endif
 }
 
+// Registers XRay sleds and trampolines of an instrumented DSO.
+// Returns the object ID if registration is successful, -1 otherwise.
+//
+// Default visibility is hidden, so we have to explicitly make it visible to
+// DSO.
+SANITIZER_INTERFACE_ATTRIBUTE int32_t __xray_register_dso(
+    const XRaySledEntry *SledsBegin, const XRaySledEntry *SledsEnd,
+    const XRayFunctionSledIndex *FnIndexBegin,
+    const XRayFunctionSledIndex *FnIndexEnd,
+    XRayTrampolines Trampolines) XRAY_NEVER_INSTRUMENT {
+  // Make sure XRay has been initialized in the main executable.
+  __xray_init();
+
+  if (__xray_num_objects() == 0) {
+    if (Verbosity())
+      Report("No XRay instrumentation map in main executable. Not initializing "
+             "XRay for DSO.\n");
+    return -1;
+  }
+
+  // Register sleds in global map.
+  int ObjId = __xray_register_sleds(SledsBegin, SledsEnd, FnIndexBegin,
+                                    FnIndexEnd, true, Trampolines);
+
+#ifndef XRAY_NO_PREINIT
+  if (ObjId >= 0 && flags()->patch_premain)
+    __xray_patch_object(ObjId);
+#endif
+
+  return ObjId;
+}
+
+// Deregisters a DSO from the main XRay runtime.
+// Called from the DSO-local runtime when the library is unloaded (e.g. if
+// dlclose is called).
+// Returns true if the object ID is valid and the DSO was successfully
+// deregistered.
+SANITIZER_INTERFACE_ATTRIBUTE bool
+__xray_deregister_dso(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+
+  if (!atomic_load(&XRayInitialized, memory_order_acquire)) {
+    if (Verbosity())
+      Report("XRay has not been initialized. Cannot deregister DSO.\n");
+    return false;
+  }
+
+  if (ObjId <= 0 || ObjId >= __xray_num_objects()) {
+    if (Verbosity())
+      Report("Can't deregister object with ID %d: ID is invalid.\n", ObjId);
+    return false;
+  }
+
+  {
+    SpinMutexLock Guard(&XRayInstrMapMutex);
+    auto &Entry = XRayInstrMaps[ObjId];
+    if (!Entry.FromDSO) {
+      if (Verbosity())
+        Report("Can't deregister object with ID %d: object does not correspond "
+               "to a shared library.\n",
+               ObjId);
+      return false;
+    }
+    if (!Entry.Loaded) {
+      if (Verbosity())
+        Report("Can't deregister object with ID %d: object is not loaded.\n",
+               ObjId);
+      return true;
+    }
+    // Mark DSO as unloaded. No need to unpatch.
+    Entry.Loaded = false;
+  }
+
+  if (Verbosity())
+    Report("Deregistered object with ID %d.\n", ObjId);
+
+  return true;
+}
+
 // FIXME: Make check-xray tests work on FreeBSD without
 // SANITIZER_CAN_USE_PREINIT_ARRAY.
 // See sanitizer_internal_defs.h where the macro is defined.

diff  --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index 5839043fcb93a8..16e60bfc22cd10 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "xray_interface_internal.h"
+#include "llvm/Support/ErrorHandling.h"
 
 #include <cinttypes>
 #include <cstdio>
@@ -36,7 +37,8 @@
 
 extern __sanitizer::SpinMutex XRayInstrMapMutex;
 extern __sanitizer::atomic_uint8_t XRayInitialized;
-extern __xray::XRaySledMap XRayInstrMap;
+extern __xray::XRaySledMap *XRayInstrMaps;
+extern __sanitizer::atomic_uint32_t XRayNumObjects;
 
 namespace __xray {
 
@@ -61,16 +63,16 @@ static const int16_t cSledLength = 20;
 #endif /* CPU architecture */
 
 // This is the function to call when we encounter the entry or exit sleds.
-atomic_uintptr_t XRayPatchedFunction{0};
+atomic_uintptr_t XRayPatchedFunction SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call from the arg1-enabled sleds/trampolines.
-atomic_uintptr_t XRayArgLogger{0};
+atomic_uintptr_t XRayArgLogger SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call when we encounter a custom event log call.
-atomic_uintptr_t XRayPatchedCustomEvent{0};
+atomic_uintptr_t XRayPatchedCustomEvent SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call when we encounter a typed event log call.
-atomic_uintptr_t XRayPatchedTypedEvent{0};
+atomic_uintptr_t XRayPatchedTypedEvent SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the global status to determine whether we are currently
 // patching/unpatching.
@@ -150,27 +152,42 @@ class MProtectHelper {
 
 namespace {
 
-bool patchSled(const XRaySledEntry &Sled, bool Enable,
-               int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+bool isObjectLoaded(int32_t ObjId) {
+  SpinMutexLock Guard(&XRayInstrMapMutex);
+  if (ObjId < 0 ||
+      ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+    return false;
+  }
+  return XRayInstrMaps[ObjId].Loaded;
+}
+
+bool patchSled(const XRaySledEntry &Sled, bool Enable, int32_t FuncId,
+               const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
   bool Success = false;
   switch (Sled.Kind) {
   case XRayEntryType::ENTRY:
-    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry);
+    Success =
+        patchFunctionEntry(Enable, FuncId, Sled, Trampolines.EntryTrampoline);
     break;
   case XRayEntryType::EXIT:
-    Success = patchFunctionExit(Enable, FuncId, Sled);
+    Success =
+        patchFunctionExit(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
     break;
   case XRayEntryType::TAIL:
-    Success = patchFunctionTailExit(Enable, FuncId, Sled);
+    Success = patchFunctionTailExit(Enable, FuncId, Sled,
+                                    Trampolines.TailExitTrampoline);
     break;
   case XRayEntryType::LOG_ARGS_ENTRY:
-    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry);
+    Success =
+        patchFunctionEntry(Enable, FuncId, Sled, Trampolines.LogArgsTrampoline);
     break;
   case XRayEntryType::CUSTOM_EVENT:
-    Success = patchCustomEvent(Enable, FuncId, Sled);
+    Success = patchCustomEvent(Enable, FuncId, Sled,
+                               Trampolines.CustomEventTrampoline);
     break;
   case XRayEntryType::TYPED_EVENT:
-    Success = patchTypedEvent(Enable, FuncId, Sled);
+    Success =
+        patchTypedEvent(Enable, FuncId, Sled, Trampolines.TypedEventTrampoline);
     break;
   default:
     Report("Unsupported sled kind '%" PRIu64 "' @%04x\n", Sled.Address,
@@ -205,10 +222,9 @@ findFunctionSleds(int32_t FuncId,
   return Index;
 }
 
-XRayPatchingStatus patchFunction(int32_t FuncId,
+XRayPatchingStatus patchFunction(int32_t FuncId, int32_t ObjId,
                                  bool Enable) XRAY_NEVER_INSTRUMENT {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
     return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
 
   uint8_t NotPatching = false;
@@ -220,13 +236,24 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch function: invalid sled map index: %d", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
 
   // If we don't have an index, we can't patch individual functions.
   if (InstrMap.Functions == 0)
     return XRayPatchingStatus::NOT_INITIALIZED;
 
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Invalid function id provided: %d\n", FuncId);
+    return XRayPatchingStatus::NOT_INITIALIZED;
+  }
+
   // FuncId must be a positive number, less than the number of functions
   // instrumented.
   if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) {
@@ -234,6 +261,8 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
     return XRayPatchingStatus::FAILED;
   }
 
+  auto PackedId = __xray::MakePackedId(FuncId, ObjId);
+
   // Now we patch ths sleds for this specific function.
   XRayFunctionSledIndex SledRange;
   if (InstrMap.SledsIndex) {
@@ -242,13 +271,13 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
   } else {
     SledRange = findFunctionSleds(FuncId, InstrMap);
   }
+
   auto *f = SledRange.Begin;
   bool SucceedOnce = false;
   for (size_t i = 0; i != SledRange.Size; ++i)
-    SucceedOnce |= patchSled(f[i], Enable, FuncId);
+    SucceedOnce |= patchSled(f[i], Enable, PackedId, InstrMap.Trampolines);
 
-  atomic_store(&XRayPatching, false,
-                            memory_order_release);
+  atomic_store(&XRayPatching, false, memory_order_release);
 
   if (!SucceedOnce) {
     Report("Failed patching any sled for function '%d'.", FuncId);
@@ -261,32 +290,31 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
 // controlPatching implements the common internals of the patching/unpatching
 // implementation. |Enable| defines whether we're enabling or disabling the
 // runtime XRay instrumentation.
-XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
-    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
-
-  uint8_t NotPatching = false;
-  if (!atomic_compare_exchange_strong(
-          &XRayPatching, &NotPatching, true, memory_order_acq_rel))
-    return XRayPatchingStatus::ONGOING; // Already patching.
-
-  uint8_t PatchingSuccess = false;
-  auto XRayPatchingStatusResetter =
-      at_scope_exit([&PatchingSuccess] {
-        if (!PatchingSuccess)
-          atomic_store(&XRayPatching, false,
-                                    memory_order_release);
-      });
-
+// This function should only be called after ensuring that XRay is initialized
+// and no other thread is currently patching.
+XRayPatchingStatus controlPatchingObjectUnchecked(bool Enable, int32_t ObjId) {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch functions: invalid sled map index: %d\n", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
   if (InstrMap.Entries == 0)
     return XRayPatchingStatus::NOT_INITIALIZED;
 
+  if (Verbosity())
+    Report("Patching object %d with %d functions.\n", ObjId, InstrMap.Entries);
+
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Object is not loaded at index: %d\n", ObjId);
+    return XRayPatchingStatus::FAILED;
+  }
+
   uint32_t FuncId = 1;
   uint64_t CurFun = 0;
 
@@ -336,20 +364,96 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
       ++FuncId;
       CurFun = F;
     }
-    patchSled(Sled, Enable, FuncId);
+    auto PackedId = __xray::MakePackedId(FuncId, ObjId);
+    patchSled(Sled, Enable, PackedId, InstrMap.Trampolines);
   }
-  atomic_store(&XRayPatching, false,
-                            memory_order_release);
-  PatchingSuccess = true;
+  atomic_store(&XRayPatching, false, memory_order_release);
   return XRayPatchingStatus::SUCCESS;
 }
 
-XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
+// Controls patching for all registered objects.
+// Returns: SUCCESS, if patching succeeds for all objects.
+//          NOT_INITIALIZED, if one or more objects returned NOT_INITIALIZED
+//             but none failed.
+//          FAILED, if patching of one or more objects failed.
+XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
+    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+  uint8_t NotPatching = false;
+  if (!atomic_compare_exchange_strong(&XRayPatching, &NotPatching, true,
+                                      memory_order_acq_rel))
+    return XRayPatchingStatus::ONGOING; // Already patching.
+
+  auto XRayPatchingStatusResetter = at_scope_exit(
+      [] { atomic_store(&XRayPatching, false, memory_order_release); });
+
+  unsigned NumObjects = __xray_num_objects();
+
+  XRayPatchingStatus CombinedStatus{NOT_INITIALIZED};
+  for (unsigned I = 0; I < NumObjects; ++I) {
+    if (!isObjectLoaded(I))
+      continue;
+    auto LastStatus = controlPatchingObjectUnchecked(Enable, I);
+    switch (LastStatus) {
+    case SUCCESS:
+      if (CombinedStatus == NOT_INITIALIZED)
+        CombinedStatus = SUCCESS;
+      break;
+    case FAILED:
+      // Report failure, but try to patch the remaining objects
+      CombinedStatus = FAILED;
+      break;
+    case NOT_INITIALIZED:
+      // XRay has been initialized but there are no sleds available for this
+      // object. Try to patch remaining objects.
+      if (CombinedStatus != FAILED)
+        CombinedStatus = NOT_INITIALIZED;
+      break;
+    case ONGOING:
+      llvm_unreachable("Status ONGOING should not appear at this point");
+    default:
+      llvm_unreachable("Unhandled patching status");
+    }
+  }
+  return CombinedStatus;
+}
+
+// Controls patching for one object.
+XRayPatchingStatus controlPatching(bool Enable,
+                                   int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
+    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+  uint8_t NotPatching = false;
+  if (!atomic_compare_exchange_strong(&XRayPatching, &NotPatching, true,
+                                      memory_order_acq_rel))
+    return XRayPatchingStatus::ONGOING; // Already patching.
+
+  auto XRayPatchingStatusResetter = at_scope_exit(
+      [] { atomic_store(&XRayPatching, false, memory_order_release); });
+
+  return controlPatchingObjectUnchecked(Enable, ObjId);
+}
+
+XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId, int32_t ObjId,
                                             bool Enable) XRAY_NEVER_INSTRUMENT {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch function: invalid sled map index: %d\n", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
+  }
+
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Object is not loaded at index: %d\n", ObjId);
+    return XRayPatchingStatus::FAILED;
   }
 
   // FuncId must be a positive number, less than the number of functions
@@ -398,7 +502,7 @@ XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
     Report("Failed mprotect: %d\n", errno);
     return XRayPatchingStatus::FAILED;
   }
-  return patchFunction(FuncId, Enable);
+  return patchFunction(FuncId, ObjId, Enable);
 }
 
 } // namespace
@@ -412,12 +516,10 @@ using namespace __xray;
 
 int __xray_set_handler(void (*entry)(int32_t,
                                      XRayEntryType)) XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
 
     atomic_store(&__xray::XRayPatchedFunction,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -425,11 +527,9 @@ int __xray_set_handler(void (*entry)(int32_t,
 
 int __xray_set_customevent_handler(void (*entry)(void *, size_t))
     XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
     atomic_store(&__xray::XRayPatchedCustomEvent,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -437,11 +537,9 @@ int __xray_set_customevent_handler(void (*entry)(void *, size_t))
 
 int __xray_set_typedevent_handler(void (*entry)(size_t, const void *,
                                                 size_t)) XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
     atomic_store(&__xray::XRayPatchedTypedEvent,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -474,39 +572,78 @@ XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT {
   return controlPatching(true);
 }
 
+XRayPatchingStatus __xray_patch_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return controlPatching(true, ObjId);
+}
+
 XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT {
   return controlPatching(false);
 }
 
+XRayPatchingStatus __xray_unpatch_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return controlPatching(false, ObjId);
+}
+
 XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
-  return mprotectAndPatchFunction(FuncId, true);
+  auto Ids = __xray::UnpackId(FuncId);
+  auto ObjId = Ids.first;
+  auto FnId = Ids.second;
+  return mprotectAndPatchFunction(FnId, ObjId, true);
+}
+
+XRayPatchingStatus
+__xray_patch_function_in_object(int32_t FuncId,
+                                int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return mprotectAndPatchFunction(FuncId, ObjId, true);
 }
 
 XRayPatchingStatus
 __xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
-  return mprotectAndPatchFunction(FuncId, false);
+  auto Ids = __xray::UnpackId(FuncId);
+  auto ObjId = Ids.first;
+  auto FnId = Ids.second;
+  return mprotectAndPatchFunction(FnId, ObjId, false);
+}
+
+XRayPatchingStatus
+__xray_unpatch_function_in_object(int32_t FuncId,
+                                  int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return mprotectAndPatchFunction(FuncId, ObjId, false);
 }
 
 int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
     return 0;
 
   // A relaxed write might not be visible even if the current thread gets
   // scheduled on a 
diff erent CPU/NUMA node.  We need to wait for everyone to
   // have this handler installed for consistency of collected data across CPUs.
   atomic_store(&XRayArgLogger, reinterpret_cast<uint64_t>(entry),
-                            memory_order_release);
+               memory_order_release);
   return 1;
 }
 
 int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); }
 
-uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+uintptr_t
+__xray_function_address(int32_t CombinedFuncId) XRAY_NEVER_INSTRUMENT {
+  auto Ids = __xray::UnpackId(CombinedFuncId);
+  return __xray_function_address_in_object(Ids.second, Ids.first);
+}
+
+uintptr_t __xray_function_address_in_object(int32_t FuncId, int32_t ObjId)
+    XRAY_NEVER_INSTRUMENT {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    auto count = atomic_load(&XRayNumObjects, memory_order_acquire);
+    if (ObjId < 0 || ObjId >= count) {
+      Report("Unable to determine function address: invalid sled map index %d "
+             "(size is %d)\n",
+             ObjId, (int)count);
+      return 0;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
 
   if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions)
@@ -525,6 +662,29 @@ uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
 }
 
 size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT {
+  return __xray_max_function_id_in_object(0);
+}
+
+size_t __xray_max_function_id_in_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  SpinMutexLock Guard(&XRayInstrMapMutex);
+  if (ObjId < 0 || ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire))
+    return 0;
+  return XRayInstrMaps[ObjId].Functions;
+}
+
+size_t __xray_num_objects() XRAY_NEVER_INSTRUMENT {
   SpinMutexLock Guard(&XRayInstrMapMutex);
-  return XRayInstrMap.Functions;
+  return atomic_load(&XRayNumObjects, memory_order_acquire);
+}
+
+int32_t __xray_unpack_function_id(int32_t PackedId) {
+  return __xray::UnpackId(PackedId).second;
+}
+
+int32_t __xray_unpack_object_id(int32_t PackedId) {
+  return __xray::UnpackId(PackedId).first;
+}
+
+int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId) {
+  return __xray::MakePackedId(FuncId, ObjId);
 }

diff  --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h
index 80c07c167f6461..5fbaa9c3f315b1 100644
--- a/compiler-rt/lib/xray/xray_interface_internal.h
+++ b/compiler-rt/lib/xray/xray_interface_internal.h
@@ -18,6 +18,18 @@
 #include "xray/xray_interface.h"
 #include <cstddef>
 #include <cstdint>
+#include <utility>
+
+extern "C" {
+// The following functions have to be defined in assembler, on a per-platform
+// basis. See xray_trampoline_*.S files for implementations.
+extern void __xray_FunctionEntry();
+extern void __xray_FunctionExit();
+extern void __xray_FunctionTailExit();
+extern void __xray_ArgLoggerEntry();
+extern void __xray_CustomEvent();
+extern void __xray_TypedEvent();
+}
 
 extern "C" {
 
@@ -67,36 +79,77 @@ struct XRayFunctionSledIndex {
                                                    uintptr_t(Begin));
   }
 };
+
+struct XRayTrampolines {
+  void (*EntryTrampoline)();
+  void (*ExitTrampoline)();
+  void (*TailExitTrampoline)();
+  void (*LogArgsTrampoline)();
+  void (*CustomEventTrampoline)();
+  void (*TypedEventTrampoline)();
+
+  XRayTrampolines() {
+    // These resolve to the definitions in the respective executable or DSO.
+    EntryTrampoline = __xray_FunctionEntry;
+    ExitTrampoline = __xray_FunctionExit;
+    TailExitTrampoline = __xray_FunctionTailExit;
+    LogArgsTrampoline = __xray_ArgLoggerEntry;
+    CustomEventTrampoline = __xray_CustomEvent;
+    TypedEventTrampoline = __xray_TypedEvent;
+  }
+};
+
+extern int32_t __xray_register_dso(const XRaySledEntry *SledsBegin,
+                                   const XRaySledEntry *SledsEnd,
+                                   const XRayFunctionSledIndex *FnIndexBegin,
+                                   const XRayFunctionSledIndex *FnIndexEnd,
+                                   XRayTrampolines Trampolines);
+
+extern bool __xray_deregister_dso(int32_t ObjId);
 }
 
 namespace __xray {
 
+constexpr uint32_t XRayNFnBits = 24;
+constexpr uint32_t XRayNObjBits = 8;
+
+constexpr uint32_t XRayFnBitMask = 0x00FFFFFF;
+constexpr uint32_t XRayObjBitMask = 0xFF000000;
+
+constexpr size_t XRayMaxFunctions = 1 << XRayNFnBits;
+constexpr size_t XRayMaxObjects = 1 << XRayNObjBits;
+
+inline int32_t MakePackedId(int32_t FnId, int32_t ObjId) {
+  return ((ObjId << XRayNFnBits) & XRayObjBitMask) | (FnId & XRayFnBitMask);
+}
+
+inline std::pair<int32_t, int32_t> UnpackId(int32_t PackedId) {
+  uint32_t ObjId = (PackedId & XRayObjBitMask) >> XRayNFnBits;
+  uint32_t FnId = PackedId & XRayFnBitMask;
+  return {ObjId, FnId};
+}
+
 struct XRaySledMap {
   const XRaySledEntry *Sleds;
   size_t Entries;
   const XRayFunctionSledIndex *SledsIndex;
   size_t Functions;
+  XRayTrampolines Trampolines;
+  bool FromDSO;
+  bool Loaded;
 };
 
 bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
                         void (*Trampoline)());
-bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                       void (*Trampoline)());
 bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
-                           const XRaySledEntry &Sled);
-bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
-bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+                           const XRaySledEntry &Sled, void (*Trampoline)());
+bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                      void (*Trampoline)());
+bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                     void (*Trampoline)());
 
 } // namespace __xray
 
-extern "C" {
-// The following functions have to be defined in assembler, on a per-platform
-// basis. See xray_trampoline_*.S files for implementations.
-extern void __xray_FunctionEntry();
-extern void __xray_FunctionExit();
-extern void __xray_FunctionTailExit();
-extern void __xray_ArgLoggerEntry();
-extern void __xray_CustomEvent();
-extern void __xray_TypedEvent();
-}
-
 #endif

diff  --git a/compiler-rt/lib/xray/xray_trampoline_x86_64.S b/compiler-rt/lib/xray/xray_trampoline_x86_64.S
index 01098f60eeab8b..0f480547b52cc6 100644
--- a/compiler-rt/lib/xray/xray_trampoline_x86_64.S
+++ b/compiler-rt/lib/xray/xray_trampoline_x86_64.S
@@ -107,6 +107,16 @@
 	.section __TEXT,__text
 #endif
 
+.macro LOAD_HANDLER_ADDR handler
+#if !defined(XRAY_PIC)
+	movq	ASM_SYMBOL(\handler)(%rip), %rax
+#else
+	movq	ASM_SYMBOL(\handler)@GOTPCREL(%rip), %rax
+	movq	(%rax), %rax
+#endif
+.endm
+
+
 //===----------------------------------------------------------------------===//
 
 	.globl ASM_SYMBOL(__xray_FunctionEntry)
@@ -121,7 +131,7 @@ ASM_SYMBOL(__xray_FunctionEntry):
 
 	// This load has to be atomic, it's concurrent with __xray_patch().
 	// On x86/amd64, a simple (type-aligned) MOV instruction is enough.
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq	%rax, %rax
 	je	LOCAL_LABEL(tmp0)
 
@@ -159,7 +169,7 @@ ASM_SYMBOL(__xray_FunctionExit):
 	movupd	%xmm1, 16(%rsp)
 	movq	%rax, 8(%rsp)
 	movq	%rdx, 0(%rsp)
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq %rax,%rax
 	je	LOCAL_LABEL(tmp2)
 
@@ -195,7 +205,7 @@ ASM_SYMBOL(__xray_FunctionTailExit):
 	SAVE_REGISTERS
 	ALIGN_STACK_16B
 
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq %rax,%rax
 	je	LOCAL_LABEL(tmp4)
 
@@ -224,12 +234,12 @@ ASM_SYMBOL(__xray_ArgLoggerEntry):
 	ALIGN_STACK_16B
 
 	// Again, these function pointer loads must be atomic; MOV is fine.
-	movq	ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray13XRayArgLoggerE
 	testq	%rax, %rax
 	jne	LOCAL_LABEL(arg1entryLog)
 
 	// If [arg1 logging handler] not set, defer to no-arg logging.
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq	%rax, %rax
 	je	LOCAL_LABEL(arg1entryFail)
 
@@ -268,7 +278,7 @@ ASM_SYMBOL(__xray_CustomEvent):
 
 	// We take two arguments to this trampoline, which should be in rdi	and rsi
 	// already.
-	movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray22XRayPatchedCustomEventE
 	testq %rax,%rax
 	je LOCAL_LABEL(customEventCleanup)
 
@@ -293,7 +303,7 @@ ASM_SYMBOL(__xray_TypedEvent):
 
 	// We pass three arguments to this trampoline, which should be in rdi, rsi
 	// and rdx without our intervention.
-	movq ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray21XRayPatchedTypedEventE
 	testq %rax,%rax
 	je LOCAL_LABEL(typedEventCleanup)
 

diff  --git a/compiler-rt/lib/xray/xray_x86_64.cpp b/compiler-rt/lib/xray/xray_x86_64.cpp
index b9666a40861d48..663a51b2686614 100644
--- a/compiler-rt/lib/xray/xray_x86_64.cpp
+++ b/compiler-rt/lib/xray/xray_x86_64.cpp
@@ -170,7 +170,8 @@ bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                       const XRaySledEntry &Sled,
+                       void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
@@ -192,11 +193,11 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
   // Prerequisite is to compute the relative offset fo the
   // __xray_FunctionExit function's address.
   const uint64_t Address = Sled.address();
-  int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
+  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
                              (static_cast<int64_t>(Address) + 11);
   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
     Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
-           reinterpret_cast<void *>(__xray_FunctionExit),
+           reinterpret_cast<void *>(Trampoline),
            reinterpret_cast<void *>(Address));
     return false;
   }
@@ -217,16 +218,16 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                           const XRaySledEntry &Sled,
+                           void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the tail call sled with a similar
   // sequence as the entry sled, but calls the tail exit sled instead.
   const uint64_t Address = Sled.address();
-  int64_t TrampolineOffset =
-      reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
-      (static_cast<int64_t>(Address) + 11);
+  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
+                             (static_cast<int64_t>(Address) + 11);
   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
     Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
-           reinterpret_cast<void *>(__xray_FunctionTailExit),
+           reinterpret_cast<void *>(Trampoline),
            reinterpret_cast<void *>(Address));
     return false;
   }
@@ -247,7 +248,8 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
-                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                      const XRaySledEntry &Sled,
+                      void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
@@ -275,7 +277,8 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
-                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                     const XRaySledEntry &Sled,
+                     void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:

diff  --git a/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
new file mode 100644
index 00000000000000..31c615bd1f81bf
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
@@ -0,0 +1,47 @@
+// Testing shared library support in basic logging mode.
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=false,xray_mode=xray-basic,xray_logfile_base=basic-mode-dso-,verbosity=1" XRAY_BASIC_OPTIONS="func_duration_threshold_us=0" %run %t/main.o 2>&1 | FileCheck %s
+// RUN: %llvm_xray account --format=csv --sort=funcid "`ls basic-mode-dso-* | head -1`" | FileCheck --check-prefix=ACCOUNT %s
+// RUN: rm basic-mode-dso-*
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+#include <unistd.h>
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+  sleep(1);
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  // Explicit patching to ensure the DSO has been loaded
+  __xray_patch();
+  instrumented_in_executable();
+  // CHECK: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+#include <unistd.h>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}
+
+// ACCOUNT: funcid,count,min,median,90%ile,99%ile,max,sum,debug,function
+// ACCOUNT-NEXT: 1,1,{{.*}}
+// ACCOUNT-NEXT: 16777217,1,{{.*}}

diff  --git a/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp b/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp
new file mode 100644
index 00000000000000..92f3c29e970d42
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp
@@ -0,0 +1,14 @@
+// Test that the DSO-local runtime library has been linked if -fxray-shared is passed.
+//
+// RUN: %clangxx -fxray-instrument -fxray-shared %s -shared -o %t.so
+// RUN: llvm-nm %t.so | FileCheck %s --check-prefix ENABLED
+
+// RUN: %clangxx -fxray-instrument %s -shared -o %t.so
+// RUN: llvm-nm %t.so | FileCheck %s --check-prefix DISABLED
+//
+// REQUIRES: target=x86_64{{.*}}
+
+[[clang::xray_always_instrument]] int always_instrumented() { return 42; }
+
+// ENABLED: __start_xray_instr_map
+// DISABLED-NOT: __start_xray_instr_map

diff  --git a/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp b/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
new file mode 100644
index 00000000000000..9db411d5ff1c6e
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
@@ -0,0 +1,107 @@
+// Check that we can patch and un-patch DSOs loaded with dlopen.
+//
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -rdynamic -fxray-instrument -fxray-shared -std=c++11 %t/main.cpp -o %t/main.o
+//
+// RUN: XRAY_OPTIONS="patch_premain=true" %run %t/main.o %t/testlib.so 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+#include <dlfcn.h>
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+typedef void (*dso_func_type)();
+
+int main(int argc, char **argv) {
+  if (argc < 2) {
+    printf("Shared library argument missing\n");
+    // CHECK-NOT: Shared library argument missing
+    return 1;
+  }
+
+  const char *dso_path = argv[1];
+
+  void *dso_handle = dlopen(dso_path, RTLD_LAZY);
+  if (!dso_handle) {
+    printf("Failed to load shared library\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+      return 1;
+    }
+    return 1;
+  }
+
+  dso_func_type instrumented_in_dso =
+      (dso_func_type)dlsym(dso_handle, "_Z19instrumented_in_dsov");
+  if (!instrumented_in_dso) {
+    printf("Failed to find symbol\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+      return 1;
+    }
+    return 1;
+  }
+
+  __xray_set_handler(test_handler);
+
+  instrumented_in_executable();
+  // CHECK: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+
+  auto status = __xray_unpatch();
+  printf("unpatching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: unpatching status: 1
+
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+
+  status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+
+  instrumented_in_executable();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+
+  dlclose(dso_handle);
+
+  status = __xray_unpatch();
+  printf("unpatching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: unpatching status: 1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}

diff  --git a/compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp b/compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp
new file mode 100644
index 00000000000000..89da2764c35cee
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp
@@ -0,0 +1,197 @@
+// Check that loading libraries with 
diff erent modes (RTLD_LOCAL/RTLD_GLOBAL)
+// and dependencies on other DSOs work correctly.
+//
+
+// RUN: split-file %s %t
+//
+// Build shared libs with dependencies b->c and e->f
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testliba.cpp -o %t/testliba.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlibc.cpp -o %t/testlibc.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlibb.cpp %t/testlibc.so -o %t/testlibb.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlibd.cpp -o %t/testlibd.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlibf.cpp -o %t/testlibf.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlibe.cpp %t/testlibf.so -o %t/testlibe.so
+//
+// Executable links with a and b explicitly and loads d and e at runtime.
+// RUN: %clangxx_xray -g -fPIC -rdynamic -fxray-instrument -fxray-shared -std=c++11 %t/main.cpp %t/testliba.so %t/testlibb.so -o %t/main.o
+//
+// RUN:  XRAY_OPTIONS="patch_premain=true" %run %t/main.o %t/testlibd.so %t/testlibe.so 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+#include <dlfcn.h>
+
+[[clang::xray_never_instrument]] void test_handler(int32_t fid,
+                                                   XRayEntryType type) {
+  printf("called: %d, object=%d, fn=%d, type=%d\n", fid, (fid >> 24) & 0xFF,
+         fid & 0x00FFFFFF, static_cast<int32_t>(type));
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+typedef void (*dso_func_type)();
+
+[[clang::xray_never_instrument]] void *load_dso(const char *path, int mode) {
+  void *dso_handle = dlopen(path, mode);
+  if (!dso_handle) {
+    printf("failed to load shared library\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+    }
+    return nullptr;
+  }
+  return dso_handle;
+}
+
+[[clang::xray_never_instrument]] void find_and_call(void *dso_handle,
+                                                    const char *fn) {
+  dso_func_type dso_fn = (dso_func_type)dlsym(dso_handle, fn);
+  if (!dso_fn) {
+    printf("failed to find symbol\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+    }
+    return;
+  }
+  dso_fn();
+}
+
+extern void a();
+extern void b();
+
+int main(int argc, char **argv) {
+
+  if (argc < 3) {
+    printf("Shared library arguments missing\n");
+    // CHECK-NOT: Shared library arguments missing
+    return 1;
+  }
+
+  const char *dso_path_d = argv[1];
+  const char *dso_path_e = argv[2];
+
+  __xray_set_handler(test_handler);
+
+  instrumented_in_executable();
+  // CHECK: called: {{[0-9]+}}, object=0, fn={{[0-9]+}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=0, fn={{[0-9]+}}, type=1
+
+  a();
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ1:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: a called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ1]], fn=1, type=1
+
+  // Make sure this object ID does not appear again
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ1]]
+
+  b(); // b calls c
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ2:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: b called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ3:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: c called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ3]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ3]]
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ2]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ2]]
+
+  // Now check explicit loading with RTLD_LOCAL
+
+  void *dso_handle_d = load_dso(dso_path_d, RTLD_LAZY | RTLD_LOCAL);
+  void *dso_handle_e = load_dso(dso_path_e, RTLD_LAZY | RTLD_LOCAL);
+  // CHECK-NOT: failed to load shared library
+
+  find_and_call(dso_handle_d, "_Z1dv");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ4:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: d called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ4]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ4]]
+
+  find_and_call(dso_handle_e, "_Z1ev");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ5:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: e called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ6:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: f called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ6]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ6]]
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ5]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ5]]
+
+  // Unload DSOs
+  dlclose(dso_handle_d);
+  dlclose(dso_handle_e);
+
+  // Repeat test with RTLD_GLOBAL
+  dso_handle_d = load_dso(dso_path_d, RTLD_LAZY | RTLD_GLOBAL);
+  dso_handle_e = load_dso(dso_path_e, RTLD_LAZY | RTLD_GLOBAL);
+  // CHECK-NOT: failed to load shared library
+
+  find_and_call(dso_handle_d, "_Z1dv");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ7:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: d called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ7]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ7]]
+
+  find_and_call(dso_handle_e, "_Z1ev");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ8:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: e called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ9:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: f called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ9]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ9]]
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ8]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ8]]
+
+  auto status = __xray_unpatch();
+  printf("unpatching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: unpatching status: 1
+
+  dlclose(dso_handle_d);
+  dlclose(dso_handle_e);
+}
+
+//--- libgenmacro.inc
+#include <cstdio>
+// Helper macros to quickly generate libraries containing a single function.
+#define GENERATE_LIB(NAME)                                                     \
+  [[clang::xray_always_instrument]] void NAME() { printf(#NAME " called\n"); }
+
+#define GENERATE_LIB_WITH_CALL(NAME, FN)                                       \
+  extern void FN();                                                            \
+  [[clang::xray_always_instrument]] void NAME() {                              \
+    printf(#NAME " called\n");                                                 \
+    FN();                                                                      \
+  }
+
+//--- testliba.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(a)
+
+//--- testlibb.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB_WITH_CALL(b, c)
+
+//--- testlibc.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(c)
+
+//--- testlibd.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(d)
+
+//--- testlibe.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB_WITH_CALL(e, f)
+
+//--- testlibf.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(f)

diff  --git a/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
new file mode 100644
index 00000000000000..0708d0383439d0
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
@@ -0,0 +1,45 @@
+// Checking that DSOs are automatically patched upon load, if patch_premain is passed.
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=true,verbosity=1" %run %t/main.o 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  __xray_set_handler(test_handler);
+  instrumented_in_executable();
+  // CHECK: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}

diff  --git a/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp
new file mode 100644
index 00000000000000..d3e992dd497725
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp
@@ -0,0 +1,75 @@
+// Check that we can patch and un-patch on demand, and that logging gets invoked
+// appropriately.
+//
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=false" %run %t/main.o 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+
+bool called = false;
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+  called = true;
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  __xray_set_handler(test_handler);
+  instrumented_in_executable();
+  // CHECK: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK: instrumented_in_dso called
+  auto status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  instrumented_in_executable();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  status = __xray_unpatch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+  status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  __xray_remove_handler();
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+  status = __xray_unpatch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}


        


More information about the cfe-commits mailing list