[clang] [compiler-rt] [XRay] Add support for instrumentation of DSOs on x86_64 (PR #90959)

Sebastian Kreutzer via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 10 06:26:35 PDT 2024


https://github.com/sebastiankreutzer updated https://github.com/llvm/llvm-project/pull/90959

>From 1f0484b73ad0bb9b40e3cd86d8abad4af14e32dc Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <SebastianKreutzer at gmx.net>
Date: Thu, 26 Oct 2023 15:13:05 +0200
Subject: [PATCH 1/7] [XRay] Add DSO support for XRay instrumentation on X86_64

---
 clang/include/clang/Driver/Options.td         |   4 +
 clang/include/clang/Driver/XRayArgs.h         |   4 +
 clang/lib/Driver/ToolChains/CommonArgs.cpp    |  12 +-
 clang/lib/Driver/XRayArgs.cpp                 |   7 +
 .../cmake/Modules/AllSupportedArchDefs.cmake  |   1 +
 compiler-rt/cmake/config-ix.cmake             |   4 +
 compiler-rt/include/xray/xray_interface.h     |  23 ++
 compiler-rt/lib/xray/CMakeLists.txt           |  73 ++++-
 compiler-rt/lib/xray/xray_dso_init.cpp        |  62 +++++
 compiler-rt/lib/xray/xray_init.cpp            | 158 +++++++++--
 compiler-rt/lib/xray/xray_interface.cpp       | 261 ++++++++++++++----
 .../lib/xray/xray_interface_internal.h        |  83 +++++-
 compiler-rt/lib/xray/xray_trampoline_x86_64.S |  24 +-
 compiler-rt/lib/xray/xray_x86_64.cpp          |  23 +-
 .../xray/TestCases/Posix/basic-mode-dso.cpp   |  47 ++++
 .../TestCases/Posix/clang-enable-shared.cpp   |  14 +
 .../test/xray/TestCases/Posix/dlopen.cpp      | 110 ++++++++
 .../TestCases/Posix/patch-premain-dso.cpp     |  45 +++
 .../Posix/patching-unpatching-dso.cpp         |  75 +++++
 19 files changed, 912 insertions(+), 118 deletions(-)
 create mode 100644 compiler-rt/lib/xray/xray_dso_init.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 953f6fc649e62..3e3be5475c0c4 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2850,6 +2850,10 @@ def fxray_selected_function_group :
   HelpText<"When using -fxray-function-groups, select which group of functions to instrument. Valid range is 0 to fxray-function-groups - 1">,
   MarshallingInfoInt<CodeGenOpts<"XRaySelectedFunctionGroup">, "0">;
 
+def fxray_enable_shared : Flag<["-"], "fxray-enable-shared">, Group<f_Group>,  Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Enable shared library instrumentation.">;
+def fno_xray_enable_shared : Flag<["-"], "fno-xray-enable-shared">, Group<f_Group>,
+  Visibility<[ClangOption, CC1Option]>;
 
 defm fine_grained_bitfield_accesses : BoolOption<"f", "fine-grained-bitfield-accesses",
   CodeGenOpts<"FineGrainedBitfieldAccesses">, DefaultFalse,
diff --git a/clang/include/clang/Driver/XRayArgs.h b/clang/include/clang/Driver/XRayArgs.h
index bdd3d979547ee..90a21e6958603 100644
--- a/clang/include/clang/Driver/XRayArgs.h
+++ b/clang/include/clang/Driver/XRayArgs.h
@@ -27,6 +27,7 @@ class XRayArgs {
   XRayInstrSet InstrumentationBundle;
   llvm::opt::Arg *XRayInstrument = nullptr;
   bool XRayRT = true;
+  bool XRayEnableShared = false;
 
 public:
   /// Parses the XRay arguments from an argument list.
@@ -35,6 +36,9 @@ class XRayArgs {
                llvm::opt::ArgStringList &CmdArgs, types::ID InputType) const;
 
   bool needsXRayRt() const { return XRayInstrument && XRayRT; }
+  bool needsXRayDSORt() const {
+    return XRayInstrument && XRayRT && XRayEnableShared;
+  }
   llvm::ArrayRef<std::string> modeList() const { return Modes; }
   XRayInstrSet instrumentationBundle() const { return InstrumentationBundle; }
 };
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 6796b43a15502..399bf795ce394 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1520,10 +1520,14 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
 }
 
 bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringList &CmdArgs) {
-  if (Args.hasArg(options::OPT_shared))
-    return false;
-
-  if (TC.getXRayArgs().needsXRayRt()) {
+  if (Args.hasArg(options::OPT_shared)) {
+    if (TC.getXRayArgs().needsXRayDSORt()) {
+      CmdArgs.push_back("-whole-archive");
+      CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray-dso"));
+      CmdArgs.push_back("-no-whole-archive");
+      return true;
+    }
+  } else if (TC.getXRayArgs().needsXRayRt()) {
     CmdArgs.push_back("--whole-archive");
     CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray"));
     for (const auto &Mode : TC.getXRayArgs().modeList())
diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp
index 8c5134e250135..7809cd7ef7c75 100644
--- a/clang/lib/Driver/XRayArgs.cpp
+++ b/clang/lib/Driver/XRayArgs.cpp
@@ -63,6 +63,10 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
         << XRayInstrument->getSpelling() << Triple.str();
   }
 
+  if (Args.hasFlag(options::OPT_fxray_enable_shared,
+                   options::OPT_fno_xray_enable_shared, false))
+    XRayEnableShared = true;
+
   // Both XRay and -fpatchable-function-entry use
   // TargetOpcode::PATCHABLE_FUNCTION_ENTER.
   if (Arg *A = Args.getLastArg(options::OPT_fpatchable_function_entry_EQ))
@@ -177,6 +181,9 @@ void XRayArgs::addArgs(const ToolChain &TC, const ArgList &Args,
   Args.addOptOutFlag(CmdArgs, options::OPT_fxray_function_index,
                      options::OPT_fno_xray_function_index);
 
+  if (XRayEnableShared)
+    CmdArgs.push_back("-fxray-enable-shared");
+
   if (const Arg *A =
           Args.getLastArg(options::OPT_fxray_instruction_threshold_EQ)) {
     int Value;
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index 2fe06273a814c..ea87edceb1b4a 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -84,6 +84,7 @@ else()
 set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
 		powerpc64le ${HEXAGON} ${LOONGARCH64})
 endif()
+set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64})
 set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})
 
 if (UNIX)
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index ba740af9e1d60..1b20015576532 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -656,6 +656,9 @@ if(APPLE)
   list_intersect(XRAY_SUPPORTED_ARCH
     ALL_XRAY_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(XRAY_DSO_SUPPORTED_ARCH
+    ALL_XRAY_DSO_SUPPORTED_ARCH
+    SANITIZER_COMMON_SUPPORTED_ARCH)
   list_intersect(SHADOWCALLSTACK_SUPPORTED_ARCH
     ALL_SHADOWCALLSTACK_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
@@ -689,6 +692,7 @@ else()
   filter_available_targets(CFI_SUPPORTED_ARCH ${ALL_CFI_SUPPORTED_ARCH})
   filter_available_targets(SCUDO_STANDALONE_SUPPORTED_ARCH ${ALL_SCUDO_STANDALONE_SUPPORTED_ARCH})
   filter_available_targets(XRAY_SUPPORTED_ARCH ${ALL_XRAY_SUPPORTED_ARCH})
+  filter_available_targets(XRAY_DSO_SUPPORTED_ARCH ${ALL_XRAY_DSO_SUPPORTED_ARCH})
   filter_available_targets(SHADOWCALLSTACK_SUPPORTED_ARCH
     ${ALL_SHADOWCALLSTACK_SUPPORTED_ARCH})
   filter_available_targets(GWP_ASAN_SUPPORTED_ARCH ${ALL_GWP_ASAN_SUPPORTED_ARCH})
diff --git a/compiler-rt/include/xray/xray_interface.h b/compiler-rt/include/xray/xray_interface.h
index 727431c04e4f7..19c0f6b23175d 100644
--- a/compiler-rt/include/xray/xray_interface.h
+++ b/compiler-rt/include/xray/xray_interface.h
@@ -97,27 +97,50 @@ enum XRayPatchingStatus {
 /// for possible result values.
 extern XRayPatchingStatus __xray_patch();
 
+extern XRayPatchingStatus __xray_patch_object(int32_t ObjId);
+
 /// Reverses the effect of __xray_patch(). See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_unpatch();
 
+extern XRayPatchingStatus __xray_unpatch_object(int32_t ObjId);
+
 /// This patches a specific function id. See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_patch_function(int32_t FuncId);
 
+extern XRayPatchingStatus __xray_patch_function_in_object(int32_t FuncId,
+                                                          int32_t ObjId);
+
 /// This unpatches a specific function id. See XRayPatchingStatus for possible
 /// result values.
 extern XRayPatchingStatus __xray_unpatch_function(int32_t FuncId);
 
+extern XRayPatchingStatus __xray_unpatch_function_in_object(int32_t FuncId,
+                                                            int32_t ObjId);
+
 /// This function returns the address of the function provided a valid function
 /// id. We return 0 if we encounter any error, even if 0 may be a valid function
 /// address.
 extern uintptr_t __xray_function_address(int32_t FuncId);
 
+extern uintptr_t __xray_function_address_in_object(int32_t FuncId,
+                                                   int32_t ObjId);
+
 /// This function returns the maximum valid function id. Returns 0 if we
 /// encounter errors (when there are no instrumented functions, etc.).
 extern size_t __xray_max_function_id();
 
+extern size_t __xray_max_function_id_in_object(int32_t ObjId);
+
+extern size_t __xray_num_objects();
+
+extern int32_t __xray_unpack_function_id(int32_t PackedId);
+
+extern int32_t __xray_unpack_object_id(int32_t PackedId);
+
+extern int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId);
+
 /// Initialize the required XRay data structures. This is useful in cases where
 /// users want to control precisely when the XRay instrumentation data
 /// structures are initialized, for example when the XRay library is built with
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index cf7b5062aae32..165e61b6f4576 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -10,6 +10,10 @@ set(XRAY_SOURCES
   xray_utils.cpp
   )
 
+set(XRAY_DSO_SOURCES
+  xray_dso_init.cpp
+  )
+
 # Implementation files for all XRay modes.
 set(XRAY_FDR_MODE_SOURCES
   xray_fdr_flags.cpp
@@ -33,6 +37,11 @@ set(x86_64_SOURCES
   xray_trampoline_x86_64.S
   )
 
+set(x86_64_DSO_SOURCES
+   xray_trampoline_x86_64.S
+   )
+
+
 set(arm_SOURCES
   xray_arm.cpp
   xray_trampoline_arm.S
@@ -128,10 +137,12 @@ set(XRAY_IMPL_HEADERS
 # consumption by tests.
 set(XRAY_ALL_SOURCE_FILES
   ${XRAY_SOURCES}
+  ${XRAY_DSO_SOURCES}
   ${XRAY_FDR_MODE_SOURCES}
   ${XRAY_BASIC_MODE_SOURCES}
   ${XRAY_PROFILING_MODE_SOURCES}
   ${x86_64_SOURCES}
+  ${x86_64_DSO_SOURCES}
   ${arm_SOURCES}
   ${armhf_SOURCES}
   ${hexagon_SOURCES}
@@ -162,6 +173,9 @@ set(XRAY_CFLAGS
   ${COMPILER_RT_CXX_CFLAGS})
 set(XRAY_COMMON_DEFINITIONS SANITIZER_COMMON_NO_REDEFINE_BUILTINS XRAY_HAS_EXCEPTIONS=1)
 
+# DSO trampolines need to be compiled with GOT addressing
+set(XRAY_COMMON_DEFINITIONS_DSO ${XRAY_COMMON_DEFINITIONS} XRAY_PIC)
+
 # Too many existing bugs, needs cleanup.
 append_list_if(COMPILER_RT_HAS_WNO_FORMAT -Wno-format XRAY_CFLAGS)
 
@@ -201,7 +215,16 @@ if (APPLE)
     CFLAGS ${XRAY_CFLAGS}
     DEFS ${XRAY_COMMON_DEFINITIONS}
     DEPS ${XRAY_DEPS})
+  add_compiler_rt_object_libraries(RTXrayDSO
+    OS ${XRAY_SUPPORTED_OS}
+    ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+    SOURCES ${XRAY_DSO_SOURCES}
+    ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+    CFLAGS ${XRAY_CFLAGS}
+    DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+    DEPS ${XRAY_DEPS})
   set(XRAY_RTXRAY_ARCH_LIBS "")
+  set(XRAY_DSO_RTXRAY_ARCH_LIBS "")
   foreach(arch ${XRAY_SUPPORTED_ARCH})
     if(NOT ${arch} IN_LIST XRAY_SOURCE_ARCHS)
       continue()
@@ -215,6 +238,17 @@ if (APPLE)
       DEFS ${XRAY_COMMON_DEFINITIONS}
       DEPS ${XRAY_DEPS})
     list(APPEND XRAY_RTXRAY_ARCH_LIBS RTXray_${arch})
+    if (${arch} IN_LIST XRAY_DSO_SUPPORTED_ARCH)
+      add_compiler_rt_object_libraries(RTXrayDSO_${arch}
+        OS ${XRAY_SUPPORTED_OS}
+        ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+        SOURCES ${${arch}_DSO_SOURCES}
+        ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+        CFLAGS ${XRAY_CFLAGS}
+        DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+        DEPS ${XRAY_DEPS})
+      list(APPEND XRAY_DSO_RTXRAY_ARCH_LIBS RTXrayDSO_${arch})
+    endif()
   endforeach()
   add_compiler_rt_object_libraries(RTXrayFDR
     OS ${XRAY_SUPPORTED_OS}
@@ -252,6 +286,17 @@ if (APPLE)
     LINK_FLAGS ${XRAY_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
     LINK_LIBS ${XRAY_LINK_LIBS}
     PARENT_TARGET xray)
+  add_compiler_rt_runtime(clang_rt.xray-dso
+    STATIC
+    OS ${XRAY_SUPPORTED_OS}
+    ARCHS ${XRAY_DSO_SUPPORTED_ARCH}
+    OBJECT_LIBS RTXrayDSO ${XRAY_DSO_RTXRAY_ARCH_LIBS}
+    CFLAGS ${XRAY_CFLAGS}
+    DEFS ${XRAY_COMMON_DEFINITIONS}
+    LINK_FLAGS ${XRAY_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
+    LINK_LIBS ${XRAY_LINK_LIBS}
+    PARENT_TARGET xray)
+
   add_compiler_rt_runtime(clang_rt.xray-fdr
     STATIC
     OS ${XRAY_SUPPORTED_OS}
@@ -283,6 +328,7 @@ if (APPLE)
     LINK_LIBS ${XRAY_LINK_LIBS}
     PARENT_TARGET xray)
 else() # not Apple
+  message("DSO Supported Archs: ${XRAY_DSO_SUPPORTED_ARCH}")
   foreach(arch ${XRAY_SUPPORTED_ARCH})
     if(NOT CAN_TARGET_${arch})
       continue()
@@ -325,7 +371,7 @@ else() # not Apple
      LINK_LIBS ${XRAY_LINK_LIBS}
      DEFS ${XRAY_COMMON_DEFINITIONS}
      OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS} RTXray
-     PARENT_TARGET xray)
+     PARENT_TARGET xray)  
     # FDR mode runtime archive (addon for clang_rt.xray)
     add_compiler_rt_runtime(clang_rt.xray-fdr
       STATIC
@@ -346,8 +392,8 @@ else() # not Apple
       DEFS ${XRAY_COMMON_DEFINITIONS}
       OBJECT_LIBS RTXrayBASIC
       PARENT_TARGET xray)
-   # Profiler Mode runtime
-   add_compiler_rt_runtime(clang_rt.xray-profiling
+    # Profiler Mode runtime
+    add_compiler_rt_runtime(clang_rt.xray-profiling
      STATIC
      ARCHS ${arch}
      CFLAGS ${XRAY_CFLAGS}
@@ -356,6 +402,27 @@ else() # not Apple
      DEFS ${XRAY_COMMON_DEFINITIONS}
      OBJECT_LIBS RTXrayPROFILING
      PARENT_TARGET xray)
+
+    if (${arch} IN_LIST XRAY_DSO_SUPPORTED_ARCH)
+      # TODO: Only implemented for X86 at the moment
+      add_compiler_rt_object_libraries(RTXrayDSO
+        ARCHS ${arch}
+        SOURCES ${XRAY_DSO_SOURCES} ${${arch}_DSO_SOURCES} 
+        ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS}
+        CFLAGS ${XRAY_CFLAGS}
+        DEFS ${XRAY_COMMON_DEFINITIONS_DSO}
+        DEPS ${XRAY_DEPS})
+      # DSO runtime archive
+      add_compiler_rt_runtime(clang_rt.xray-dso
+        STATIC
+        ARCHS ${arch}
+        CFLAGS ${XRAY_CFLAGS}
+        LINK_FLAGS ${XRAY_LINK_FLAGS}
+        LINK_LIBS ${XRAY_LINK_LIBS}
+        DEFS ${XRAY_COMMON_DEFINITIONS}
+        OBJECT_LIBS RTXrayDSO
+        PARENT_TARGET xray)
+    endif()
   endforeach()
 endif() # not Apple
 
diff --git a/compiler-rt/lib/xray/xray_dso_init.cpp b/compiler-rt/lib/xray/xray_dso_init.cpp
new file mode 100644
index 0000000000000..ad5f91f3f2448
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_dso_init.cpp
@@ -0,0 +1,62 @@
+//===-- xray_init.cpp -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay initialisation logic for DSOs.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+
+using namespace __sanitizer;
+
+extern "C" {
+extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak))
+__attribute__((visibility("hidden")));
+
+#if SANITIZER_MAC
+// HACK: This is a temporary workaround to make XRay build on
+// Darwin, but it will probably not work at runtime.
+extern const XRaySledEntry __start_xray_instr_map[] = {};
+extern const XRaySledEntry __stop_xray_instr_map[] = {};
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] = {};
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] = {};
+#endif
+}
+
+// Handler functions to call in the patched entry/exit sled.
+extern atomic_uintptr_t XRayPatchedFunction;
+extern atomic_uintptr_t XRayArgLogger;
+extern atomic_uintptr_t XRayPatchedCustomEvent;
+extern atomic_uintptr_t XRayPatchedTypedEvent;
+
+static int __xray_object_id{-1};
+
+// Note: .preinit_array initialization does not work for DSOs
+__attribute__((constructor(0))) static void
+__xray_init_dso() XRAY_NEVER_INSTRUMENT {
+  // Register sleds in main XRay runtime.
+  __xray_object_id =
+      __xray_register_dso(__start_xray_instr_map, __stop_xray_instr_map,
+                          __start_xray_fn_idx, __stop_xray_fn_idx, {});
+}
+
+__attribute__((destructor(0))) static void
+__xray_finalize_dso() XRAY_NEVER_INSTRUMENT {
+  // Inform the main runtime that this DSO is no longer used.
+  __xray_deregister_dso(__xray_object_id);
+}
diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index f22a31b95686d..c3570d97701ee 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -16,6 +16,8 @@
 #include <unistd.h>
 
 #include "sanitizer_common/sanitizer_common.h"
+#include "xray/xray_interface.h"
+#include "xray_allocator.h"
 #include "xray_defs.h"
 #include "xray_flags.h"
 #include "xray_interface_internal.h"
@@ -28,7 +30,7 @@ extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak));
 extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak));
 
 #if SANITIZER_APPLE
-// HACK: This is a temporary workaround to make XRay build on 
+// HACK: This is a temporary workaround to make XRay build on
 // Darwin, but it will probably not work at runtime.
 const XRaySledEntry __start_xray_instr_map[] = {};
 extern const XRaySledEntry __stop_xray_instr_map[] = {};
@@ -50,7 +52,11 @@ atomic_uint8_t XRayInitialized{0};
 
 // This should always be updated before XRayInitialized is updated.
 SpinMutex XRayInstrMapMutex;
-XRaySledMap XRayInstrMap;
+// XRaySledMap XRayInstrMap;
+//  Contains maps for the main executable as well as DSOs.
+// std::vector<XRaySledMap> XRayInstrMaps;
+XRaySledMap *XRayInstrMaps;
+atomic_uint32_t XRayNumObjects;
 
 // Global flag to determine whether the flags have been initialized.
 atomic_uint8_t XRayFlagsInitialized{0};
@@ -58,6 +64,60 @@ atomic_uint8_t XRayFlagsInitialized{0};
 // A mutex to allow only one thread to initialize the XRay data structures.
 SpinMutex XRayInitMutex;
 
+int32_t
+__xray_register_sleds(const XRaySledEntry *SledsBegin,
+                      const XRaySledEntry *SledsEnd,
+                      const XRayFunctionSledIndex *FnIndexBegin,
+                      const XRayFunctionSledIndex *FnIndexEnd, bool FromDSO,
+                      XRayTrampolines Trampolines) XRAY_NEVER_INSTRUMENT {
+  if (!SledsBegin || !SledsEnd) {
+    return -1;
+  }
+  XRaySledMap SledMap;
+  SledMap.FromDSO = FromDSO;
+  SledMap.Loaded = true;
+  SledMap.Trampolines = Trampolines;
+  SledMap.Sleds = SledsBegin;
+  SledMap.Entries = SledsEnd - SledsBegin;
+  if (FnIndexBegin != nullptr) {
+    SledMap.SledsIndex = FnIndexBegin;
+    SledMap.Functions = FnIndexEnd - FnIndexBegin;
+  } else {
+    size_t CountFunctions = 0;
+    uint64_t LastFnAddr = 0;
+
+    for (std::size_t I = 0; I < SledMap.Entries; I++) {
+      const auto &Sled = SledMap.Sleds[I];
+      const auto Function = Sled.function();
+      if (Function != LastFnAddr) {
+        CountFunctions++;
+        LastFnAddr = Function;
+      }
+    }
+
+    SledMap.Functions = CountFunctions;
+  }
+  if (SledMap.Functions >= XRayMaxFunctions) {
+    Report("Too many functions! Maximum is %ld\n", XRayMaxFunctions);
+    return -1;
+  }
+
+  if (Verbosity()) {
+    Report("Registering %d new functions!\n", SledMap.Functions);
+  }
+
+  {
+    SpinMutexLock Guard(&XRayInstrMapMutex);
+    auto Idx = atomic_fetch_add(&XRayNumObjects, 1, memory_order_acq_rel);
+    if (Idx >= XRayMaxObjects) {
+      Report("Too many objects registered! Maximum is %ld\n", XRayMaxObjects);
+      return -1;
+    }
+    XRayInstrMaps[Idx] = std::move(SledMap);
+    return Idx;
+  }
+}
+
 // __xray_init() will do the actual loading of the current process' memory map
 // and then proceed to look for the .xray_instr_map section/segment.
 void __xray_init() XRAY_NEVER_INSTRUMENT {
@@ -80,29 +140,14 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
     return;
   }
 
-  {
-    SpinMutexLock Guard(&XRayInstrMapMutex);
-    XRayInstrMap.Sleds = __start_xray_instr_map;
-    XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
-    if (__start_xray_fn_idx != nullptr) {
-      XRayInstrMap.SledsIndex = __start_xray_fn_idx;
-      XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx;
-    } else {
-      size_t CountFunctions = 0;
-      uint64_t LastFnAddr = 0;
-
-      for (std::size_t I = 0; I < XRayInstrMap.Entries; I++) {
-        const auto &Sled = XRayInstrMap.Sleds[I];
-        const auto Function = Sled.function();
-        if (Function != LastFnAddr) {
-          CountFunctions++;
-          LastFnAddr = Function;
-        }
-      }
+  atomic_store(&XRayNumObjects, 0, memory_order_release);
+
+  // Pre-allocation takes up approx. 5kB for XRayMaxObjects=64.
+  XRayInstrMaps = allocateBuffer<XRaySledMap>(XRayMaxObjects);
+
+  __xray_register_sleds(__start_xray_instr_map, __stop_xray_instr_map,
+                        __start_xray_fn_idx, __stop_xray_fn_idx, false, {});
 
-      XRayInstrMap.Functions = CountFunctions;
-    }
-  }
   atomic_store(&XRayInitialized, true, memory_order_release);
 
 #ifndef XRAY_NO_PREINIT
@@ -111,6 +156,71 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
 #endif
 }
 
+// Default visibility is hidden, so we have to explicitly make it visible to
+// DSO.
+SANITIZER_INTERFACE_ATTRIBUTE int32_t __xray_register_dso(
+    const XRaySledEntry *SledsBegin, const XRaySledEntry *SledsEnd,
+    const XRayFunctionSledIndex *FnIndexBegin,
+    const XRayFunctionSledIndex *FnIndexEnd,
+    XRayTrampolines Trampolines) XRAY_NEVER_INSTRUMENT {
+  // Make sure XRay has been initialized in the main executable.
+  __xray_init();
+
+  if (__xray_num_objects() == 0) {
+    if (Verbosity())
+      Report("No XRay instrumentation map in main executable. Not initializing "
+             "XRay for DSO.\n");
+    return -1;
+  }
+
+  // Register sleds in global map.
+  int ObjId = __xray_register_sleds(SledsBegin, SledsEnd, FnIndexBegin,
+                                     FnIndexEnd, true, Trampolines);
+
+#ifndef XRAY_NO_PREINIT
+  if (ObjId >= 0 && flags()->patch_premain)
+    __xray_patch_object(ObjId);
+#endif
+
+  return ObjId;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE bool
+__xray_deregister_dso(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  // Make sure XRay has been initialized in the main executable.
+  __xray_init();
+
+  if (ObjId <= 0 || ObjId >= __xray_num_objects()) {
+    if (Verbosity())
+      Report("Can't deregister object with ID %d: ID is invalid.\n", ObjId);
+    return false;
+  }
+
+  {
+    SpinMutexLock Guard(&XRayInstrMapMutex);
+    auto &Entry = XRayInstrMaps[ObjId];
+    if (!Entry.FromDSO) {
+      if (Verbosity())
+        Report("Can't deregister object with ID %d: object does not correspond "
+               "to a shared library.\n",
+               ObjId);
+      return false;
+    }
+    if (!Entry.Loaded) {
+      if (Verbosity())
+        Report("Can't deregister object with ID %d: object is not loaded.\n",
+               ObjId);
+    }
+    // This is all we have to do here.
+    Entry.Loaded = false;
+  }
+
+  if (Verbosity())
+    Report("Deregistered object with ID %d.\n", ObjId);
+
+  return true;
+}
+
 // FIXME: Make check-xray tests work on FreeBSD without
 // SANITIZER_CAN_USE_PREINIT_ARRAY.
 // See sanitizer_internal_defs.h where the macro is defined.
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index 5839043fcb93a..dcd75c83b0b00 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -36,7 +36,8 @@
 
 extern __sanitizer::SpinMutex XRayInstrMapMutex;
 extern __sanitizer::atomic_uint8_t XRayInitialized;
-extern __xray::XRaySledMap XRayInstrMap;
+extern __xray::XRaySledMap *XRayInstrMaps;
+extern __sanitizer::atomic_uint32_t XRayNumObjects;
 
 namespace __xray {
 
@@ -61,16 +62,16 @@ static const int16_t cSledLength = 20;
 #endif /* CPU architecture */
 
 // This is the function to call when we encounter the entry or exit sleds.
-atomic_uintptr_t XRayPatchedFunction{0};
+atomic_uintptr_t XRayPatchedFunction SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call from the arg1-enabled sleds/trampolines.
-atomic_uintptr_t XRayArgLogger{0};
+atomic_uintptr_t XRayArgLogger SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call when we encounter a custom event log call.
-atomic_uintptr_t XRayPatchedCustomEvent{0};
+atomic_uintptr_t XRayPatchedCustomEvent SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the function to call when we encounter a typed event log call.
-atomic_uintptr_t XRayPatchedTypedEvent{0};
+atomic_uintptr_t XRayPatchedTypedEvent SANITIZER_INTERFACE_ATTRIBUTE{0};
 
 // This is the global status to determine whether we are currently
 // patching/unpatching.
@@ -150,27 +151,42 @@ class MProtectHelper {
 
 namespace {
 
-bool patchSled(const XRaySledEntry &Sled, bool Enable,
-               int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+bool isObjectLoaded(int32_t ObjId) {
+  SpinMutexLock Guard(&XRayInstrMapMutex);
+  if (ObjId < 0 ||
+      ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+    return false;
+  }
+  return XRayInstrMaps[ObjId].Loaded;
+}
+
+bool patchSled(const XRaySledEntry &Sled, bool Enable, int32_t FuncId,
+               const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
   bool Success = false;
   switch (Sled.Kind) {
   case XRayEntryType::ENTRY:
-    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry);
+    Success =
+        patchFunctionEntry(Enable, FuncId, Sled, Trampolines.EntryTrampoline);
     break;
   case XRayEntryType::EXIT:
-    Success = patchFunctionExit(Enable, FuncId, Sled);
+    Success =
+        patchFunctionExit(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
     break;
   case XRayEntryType::TAIL:
-    Success = patchFunctionTailExit(Enable, FuncId, Sled);
+    Success = patchFunctionTailExit(Enable, FuncId, Sled,
+                                    Trampolines.TailExitTrampoline);
     break;
   case XRayEntryType::LOG_ARGS_ENTRY:
-    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry);
+    Success =
+        patchFunctionEntry(Enable, FuncId, Sled, Trampolines.LogArgsTrampoline);
     break;
   case XRayEntryType::CUSTOM_EVENT:
-    Success = patchCustomEvent(Enable, FuncId, Sled);
+    Success = patchCustomEvent(Enable, FuncId, Sled,
+                               Trampolines.CustomEventTrampoline);
     break;
   case XRayEntryType::TYPED_EVENT:
-    Success = patchTypedEvent(Enable, FuncId, Sled);
+    Success =
+        patchTypedEvent(Enable, FuncId, Sled, Trampolines.TypedEventTrampoline);
     break;
   default:
     Report("Unsupported sled kind '%" PRIu64 "' @%04x\n", Sled.Address,
@@ -205,10 +221,9 @@ findFunctionSleds(int32_t FuncId,
   return Index;
 }
 
-XRayPatchingStatus patchFunction(int32_t FuncId,
+XRayPatchingStatus patchFunction(int32_t FuncId, int32_t ObjId,
                                  bool Enable) XRAY_NEVER_INSTRUMENT {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
     return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
 
   uint8_t NotPatching = false;
@@ -220,13 +235,24 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch function: invalid sled map index: %d", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
 
   // If we don't have an index, we can't patch individual functions.
   if (InstrMap.Functions == 0)
     return XRayPatchingStatus::NOT_INITIALIZED;
 
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Invalid function id provided: %d\n", FuncId);
+    return XRayPatchingStatus::NOT_INITIALIZED;
+  }
+
   // FuncId must be a positive number, less than the number of functions
   // instrumented.
   if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) {
@@ -234,6 +260,8 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
     return XRayPatchingStatus::FAILED;
   }
 
+  auto PackedId = __xray::MakePackedId(FuncId, ObjId);
+
   // Now we patch ths sleds for this specific function.
   XRayFunctionSledIndex SledRange;
   if (InstrMap.SledsIndex) {
@@ -242,13 +270,13 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
   } else {
     SledRange = findFunctionSleds(FuncId, InstrMap);
   }
+
   auto *f = SledRange.Begin;
   bool SucceedOnce = false;
   for (size_t i = 0; i != SledRange.Size; ++i)
-    SucceedOnce |= patchSled(f[i], Enable, FuncId);
+    SucceedOnce |= patchSled(f[i], Enable, PackedId, InstrMap.Trampolines);
 
-  atomic_store(&XRayPatching, false,
-                            memory_order_release);
+  atomic_store(&XRayPatching, false, memory_order_release);
 
   if (!SucceedOnce) {
     Report("Failed patching any sled for function '%d'.", FuncId);
@@ -261,9 +289,10 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
 // controlPatching implements the common internals of the patching/unpatching
 // implementation. |Enable| defines whether we're enabling or disabling the
 // runtime XRay instrumentation.
-XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
+XRayPatchingStatus controlPatching(bool Enable,
+                                   int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
     return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
 
   uint8_t NotPatching = false;
@@ -275,18 +304,31 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
   auto XRayPatchingStatusResetter =
       at_scope_exit([&PatchingSuccess] {
         if (!PatchingSuccess)
-          atomic_store(&XRayPatching, false,
-                                    memory_order_release);
+          atomic_store(&XRayPatching, false, memory_order_release);
       });
 
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch functions: invalid sled map index: %d\n", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
   if (InstrMap.Entries == 0)
     return XRayPatchingStatus::NOT_INITIALIZED;
 
+  if (Verbosity())
+    Report("Patching object %d with %d functions.\n", ObjId, InstrMap.Entries);
+
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Object is not loaded at index: %d\n", ObjId);
+    return XRayPatchingStatus::FAILED;
+  }
+
   uint32_t FuncId = 1;
   uint64_t CurFun = 0;
 
@@ -336,20 +378,31 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
       ++FuncId;
       CurFun = F;
     }
-    patchSled(Sled, Enable, FuncId);
+    auto PackedId = __xray::MakePackedId(FuncId, ObjId);
+    patchSled(Sled, Enable, PackedId, InstrMap.Trampolines);
   }
-  atomic_store(&XRayPatching, false,
-                            memory_order_release);
+  atomic_store(&XRayPatching, false, memory_order_release);
   PatchingSuccess = true;
   return XRayPatchingStatus::SUCCESS;
 }
 
-XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
+XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId, int32_t ObjId,
                                             bool Enable) XRAY_NEVER_INSTRUMENT {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    if (ObjId < 0 ||
+        ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire)) {
+      Report("Unable to patch function: invalid sled map index: %d\n", ObjId);
+      return XRayPatchingStatus::FAILED;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
+  }
+
+  // Check if the corresponding DSO has been unloaded.
+  if (!InstrMap.Loaded) {
+    Report("Object is not loaded at index: %d\n", ObjId);
+    return XRayPatchingStatus::FAILED;
   }
 
   // FuncId must be a positive number, less than the number of functions
@@ -367,7 +420,7 @@ XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
     return XRayPatchingStatus::FAILED;
   }
 
-  // Here we compute the minimum sled and maximum sled associated with a
+  // Here we compute the minumum sled and maximum sled associated with a
   // particular function ID.
   XRayFunctionSledIndex SledRange;
   if (InstrMap.SledsIndex) {
@@ -398,7 +451,7 @@ XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
     Report("Failed mprotect: %d\n", errno);
     return XRayPatchingStatus::FAILED;
   }
-  return patchFunction(FuncId, Enable);
+  return patchFunction(FuncId, ObjId, Enable);
 }
 
 } // namespace
@@ -412,12 +465,10 @@ using namespace __xray;
 
 int __xray_set_handler(void (*entry)(int32_t,
                                      XRayEntryType)) XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
 
     atomic_store(&__xray::XRayPatchedFunction,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -425,11 +476,9 @@ int __xray_set_handler(void (*entry)(int32_t,
 
 int __xray_set_customevent_handler(void (*entry)(void *, size_t))
     XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
     atomic_store(&__xray::XRayPatchedCustomEvent,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -437,11 +486,9 @@ int __xray_set_customevent_handler(void (*entry)(void *, size_t))
 
 int __xray_set_typedevent_handler(void (*entry)(size_t, const void *,
                                                 size_t)) XRAY_NEVER_INSTRUMENT {
-  if (atomic_load(&XRayInitialized,
-                               memory_order_acquire)) {
+  if (atomic_load(&XRayInitialized, memory_order_acquire)) {
     atomic_store(&__xray::XRayPatchedTypedEvent,
-                              reinterpret_cast<uintptr_t>(entry),
-                              memory_order_release);
+                 reinterpret_cast<uintptr_t>(entry), memory_order_release);
     return 1;
   }
   return 0;
@@ -471,42 +518,123 @@ uint16_t __xray_register_event_type(
 }
 
 XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT {
-  return controlPatching(true);
+  XRayPatchingStatus CombinedStatus{SUCCESS};
+  for (size_t I = 0; I < __xray_num_objects(); ++I) {
+    if (!isObjectLoaded(I))
+      continue;
+    auto LastStatus = controlPatching(true, I);
+    switch (LastStatus) {
+    case FAILED:
+      CombinedStatus = FAILED;
+      break;
+    case NOT_INITIALIZED:
+      if (CombinedStatus != FAILED)
+        CombinedStatus = NOT_INITIALIZED;
+      break;
+    case ONGOING:
+      if (CombinedStatus != FAILED && CombinedStatus != NOT_INITIALIZED)
+        CombinedStatus = ONGOING;
+      break;
+    default:
+      break;
+    }
+  }
+  return CombinedStatus;
+}
+
+XRayPatchingStatus __xray_patch_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return controlPatching(true, ObjId);
 }
 
 XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT {
-  return controlPatching(false);
+  XRayPatchingStatus CombinedStatus{SUCCESS};
+  for (size_t I = 0; I < __xray_num_objects(); ++I) {
+    if (!isObjectLoaded(I))
+      continue;
+    auto LastStatus = controlPatching(false, I);
+    switch (LastStatus) {
+    case FAILED:
+      CombinedStatus = FAILED;
+      break;
+    case NOT_INITIALIZED:
+      if (CombinedStatus != FAILED)
+        CombinedStatus = NOT_INITIALIZED;
+      break;
+    case ONGOING:
+      if (CombinedStatus != FAILED && CombinedStatus != NOT_INITIALIZED)
+        CombinedStatus = ONGOING;
+      break;
+    default:
+      break;
+    }
+  }
+  return CombinedStatus;
+}
+
+XRayPatchingStatus __xray_unpatch_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return controlPatching(false, ObjId);
 }
 
 XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
-  return mprotectAndPatchFunction(FuncId, true);
+  auto Ids = __xray::UnpackId(FuncId);
+  auto ObjId = Ids.first;
+  auto FnId = Ids.second;
+  return mprotectAndPatchFunction(FnId, ObjId, true);
+}
+
+XRayPatchingStatus
+__xray_patch_function_in_object(int32_t FuncId,
+                                int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return mprotectAndPatchFunction(FuncId, ObjId, true);
 }
 
 XRayPatchingStatus
 __xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
-  return mprotectAndPatchFunction(FuncId, false);
+  auto Ids = __xray::UnpackId(FuncId);
+  auto ObjId = Ids.first;
+  auto FnId = Ids.second;
+  return mprotectAndPatchFunction(FnId, ObjId, false);
+}
+
+XRayPatchingStatus
+__xray_unpatch_function_in_object(int32_t FuncId,
+                                  int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  return mprotectAndPatchFunction(FuncId, ObjId, false);
 }
 
 int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) {
-  if (!atomic_load(&XRayInitialized,
-                                memory_order_acquire))
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
     return 0;
 
   // A relaxed write might not be visible even if the current thread gets
   // scheduled on a different CPU/NUMA node.  We need to wait for everyone to
   // have this handler installed for consistency of collected data across CPUs.
   atomic_store(&XRayArgLogger, reinterpret_cast<uint64_t>(entry),
-                            memory_order_release);
+               memory_order_release);
   return 1;
 }
 
 int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); }
 
-uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+uintptr_t
+__xray_function_address(int32_t CombinedFuncId) XRAY_NEVER_INSTRUMENT {
+  auto Ids = __xray::UnpackId(CombinedFuncId);
+  return __xray_function_address_in_object(Ids.second, Ids.first);
+}
+
+uintptr_t __xray_function_address_in_object(int32_t FuncId, int32_t ObjId)
+    XRAY_NEVER_INSTRUMENT {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
-    InstrMap = XRayInstrMap;
+    auto count = atomic_load(&XRayNumObjects, memory_order_acquire);
+    if (ObjId < 0 || ObjId >= count) {
+      Report("Unable to determine function address: invalid sled map index %d "
+             "(size is %d)\n",
+             ObjId, (int)count);
+      return 0;
+    }
+    InstrMap = XRayInstrMaps[ObjId];
   }
 
   if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions)
@@ -525,6 +653,29 @@ uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
 }
 
 size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT {
+  return __xray_max_function_id_in_object(0);
+}
+
+size_t __xray_max_function_id_in_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+  SpinMutexLock Guard(&XRayInstrMapMutex);
+  if (ObjId < 0 || ObjId > atomic_load(&XRayNumObjects, memory_order_acquire))
+    return 0;
+  return XRayInstrMaps[ObjId].Functions;
+}
+
+size_t __xray_num_objects() XRAY_NEVER_INSTRUMENT {
   SpinMutexLock Guard(&XRayInstrMapMutex);
-  return XRayInstrMap.Functions;
+  return atomic_load(&XRayNumObjects, memory_order_acquire);
+}
+
+int32_t __xray_unpack_function_id(int32_t PackedId) {
+  return __xray::UnpackId(PackedId).second;
+}
+
+int32_t __xray_unpack_object_id(int32_t PackedId) {
+  return __xray::UnpackId(PackedId).first;
+}
+
+int32_t __xray_pack_id(int32_t FuncId, int32_t ObjId) {
+  return __xray::MakePackedId(FuncId, ObjId);
 }
diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h
index 80c07c167f646..ec5752a9875e3 100644
--- a/compiler-rt/lib/xray/xray_interface_internal.h
+++ b/compiler-rt/lib/xray/xray_interface_internal.h
@@ -18,6 +18,18 @@
 #include "xray/xray_interface.h"
 #include <cstddef>
 #include <cstdint>
+#include <utility>
+
+extern "C" {
+// The following functions have to be defined in assembler, on a per-platform
+// basis. See xray_trampoline_*.S files for implementations.
+extern void __xray_FunctionEntry();
+extern void __xray_FunctionExit();
+extern void __xray_FunctionTailExit();
+extern void __xray_ArgLoggerEntry();
+extern void __xray_CustomEvent();
+extern void __xray_TypedEvent();
+}
 
 extern "C" {
 
@@ -67,36 +79,77 @@ struct XRayFunctionSledIndex {
                                                    uintptr_t(Begin));
   }
 };
+
+struct XRayTrampolines {
+  void (*EntryTrampoline)();
+  void (*ExitTrampoline)();
+  void (*TailExitTrampoline)();
+  void (*LogArgsTrampoline)();
+  void (*CustomEventTrampoline)();
+  void (*TypedEventTrampoline)();
+
+  XRayTrampolines() {
+    // These resolve to the definitions in the respective executable or DSO.
+    EntryTrampoline = __xray_FunctionEntry;
+    ExitTrampoline = __xray_FunctionExit;
+    TailExitTrampoline = __xray_FunctionTailExit;
+    LogArgsTrampoline = __xray_ArgLoggerEntry;
+    CustomEventTrampoline = __xray_CustomEvent;
+    TypedEventTrampoline = __xray_TypedEvent;
+  }
+};
+
+extern int32_t __xray_register_dso(const XRaySledEntry *SledsBegin,
+                                   const XRaySledEntry *SledsEnd,
+                                   const XRayFunctionSledIndex *FnIndexBegin,
+                                   const XRayFunctionSledIndex *FnIndexEnd,
+                                   XRayTrampolines Trampolines);
+
+extern bool __xray_deregister_dso(int32_t ObjId);
 }
 
 namespace __xray {
 
+constexpr uint32_t XRayNFnBits = 24;
+constexpr uint32_t XRayNObjBits = 8;
+
+constexpr uint32_t XRayFnBitMask = 0x00FFFFFF;
+constexpr uint32_t XRayObjBitMask = 0xFF000000;
+
+constexpr size_t XRayMaxFunctions = 1 << XRayNFnBits;
+constexpr size_t XRayMaxObjects = 1 << XRayNObjBits;
+
+inline int32_t MakePackedId(int32_t FnId, int32_t ObjId) {
+  return ((ObjId << XRayNFnBits) & XRayObjBitMask) | (FnId & XRayFnBitMask);
+}
+
+inline std::pair<int32_t, int32_t> UnpackId(int32_t CombinedId) {
+  uint32_t ObjId = (CombinedId & XRayObjBitMask) >> XRayNFnBits;
+  uint32_t FnId = CombinedId & XRayFnBitMask;
+  return {ObjId, FnId};
+}
+
 struct XRaySledMap {
   const XRaySledEntry *Sleds;
   size_t Entries;
   const XRayFunctionSledIndex *SledsIndex;
   size_t Functions;
+  XRayTrampolines Trampolines;
+  bool FromDSO;
+  bool Loaded;
 };
 
 bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
                         void (*Trampoline)());
-bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                       void (*Trampoline)());
 bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
-                           const XRaySledEntry &Sled);
-bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
-bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+                           const XRaySledEntry &Sled, void (*Trampoline)());
+bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                      void (*Trampoline)());
+bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled,
+                     void (*Trampoline)());
 
 } // namespace __xray
 
-extern "C" {
-// The following functions have to be defined in assembler, on a per-platform
-// basis. See xray_trampoline_*.S files for implementations.
-extern void __xray_FunctionEntry();
-extern void __xray_FunctionExit();
-extern void __xray_FunctionTailExit();
-extern void __xray_ArgLoggerEntry();
-extern void __xray_CustomEvent();
-extern void __xray_TypedEvent();
-}
-
 #endif
diff --git a/compiler-rt/lib/xray/xray_trampoline_x86_64.S b/compiler-rt/lib/xray/xray_trampoline_x86_64.S
index ff3ac91071a60..31c0deb34e0cb 100644
--- a/compiler-rt/lib/xray/xray_trampoline_x86_64.S
+++ b/compiler-rt/lib/xray/xray_trampoline_x86_64.S
@@ -108,6 +108,16 @@
 	.section __TEXT,__text
 #endif
 
+.macro LOAD_HANDLER_ADDR handler
+#if !defined(XRAY_PIC)
+	movq	ASM_SYMBOL(\handler)(%rip), %rax
+#else
+	movq	ASM_SYMBOL(\handler)@GOTPCREL(%rip), %rax
+	movq	(%rax), %rax
+#endif
+.endm
+
+
 //===----------------------------------------------------------------------===//
 
 	.globl ASM_SYMBOL(__xray_FunctionEntry)
@@ -122,7 +132,7 @@ ASM_SYMBOL(__xray_FunctionEntry):
 
 	// This load has to be atomic, it's concurrent with __xray_patch().
 	// On x86/amd64, a simple (type-aligned) MOV instruction is enough.
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq	%rax, %rax
 	je	LOCAL_LABEL(tmp0)
 
@@ -160,7 +170,7 @@ ASM_SYMBOL(__xray_FunctionExit):
 	movupd	%xmm1, 16(%rsp)
 	movq	%rax, 8(%rsp)
 	movq	%rdx, 0(%rsp)
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq %rax,%rax
 	je	LOCAL_LABEL(tmp2)
 
@@ -196,7 +206,7 @@ ASM_SYMBOL(__xray_FunctionTailExit):
 	ALIGN_STACK_16B
 	SAVE_REGISTERS
 
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq %rax,%rax
 	je	LOCAL_LABEL(tmp4)
 
@@ -225,12 +235,12 @@ ASM_SYMBOL(__xray_ArgLoggerEntry):
 	SAVE_REGISTERS
 
 	// Again, these function pointer loads must be atomic; MOV is fine.
-	movq	ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray13XRayArgLoggerE
 	testq	%rax, %rax
 	jne	LOCAL_LABEL(arg1entryLog)
 
 	// If [arg1 logging handler] not set, defer to no-arg logging.
-	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray19XRayPatchedFunctionE
 	testq	%rax, %rax
 	je	LOCAL_LABEL(arg1entryFail)
 
@@ -269,7 +279,7 @@ ASM_SYMBOL(__xray_CustomEvent):
 
 	// We take two arguments to this trampoline, which should be in rdi	and rsi
 	// already.
-	movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray22XRayPatchedCustomEventE
 	testq %rax,%rax
 	je LOCAL_LABEL(customEventCleanup)
 
@@ -294,7 +304,7 @@ ASM_SYMBOL(__xray_TypedEvent):
 
 	// We pass three arguments to this trampoline, which should be in rdi, rsi
 	// and rdx without our intervention.
-	movq ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE)(%rip), %rax
+	LOAD_HANDLER_ADDR _ZN6__xray21XRayPatchedTypedEventE
 	testq %rax,%rax
 	je LOCAL_LABEL(typedEventCleanup)
 
diff --git a/compiler-rt/lib/xray/xray_x86_64.cpp b/compiler-rt/lib/xray/xray_x86_64.cpp
index b9666a40861d4..663a51b268661 100644
--- a/compiler-rt/lib/xray/xray_x86_64.cpp
+++ b/compiler-rt/lib/xray/xray_x86_64.cpp
@@ -170,7 +170,8 @@ bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                       const XRaySledEntry &Sled,
+                       void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
@@ -192,11 +193,11 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
   // Prerequisite is to compute the relative offset fo the
   // __xray_FunctionExit function's address.
   const uint64_t Address = Sled.address();
-  int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
+  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
                              (static_cast<int64_t>(Address) + 11);
   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
     Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
-           reinterpret_cast<void *>(__xray_FunctionExit),
+           reinterpret_cast<void *>(Trampoline),
            reinterpret_cast<void *>(Address));
     return false;
   }
@@ -217,16 +218,16 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
-                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                           const XRaySledEntry &Sled,
+                           void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the tail call sled with a similar
   // sequence as the entry sled, but calls the tail exit sled instead.
   const uint64_t Address = Sled.address();
-  int64_t TrampolineOffset =
-      reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
-      (static_cast<int64_t>(Address) + 11);
+  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
+                             (static_cast<int64_t>(Address) + 11);
   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
     Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
-           reinterpret_cast<void *>(__xray_FunctionTailExit),
+           reinterpret_cast<void *>(Trampoline),
            reinterpret_cast<void *>(Address));
     return false;
   }
@@ -247,7 +248,8 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
-                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                      const XRaySledEntry &Sled,
+                      void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
@@ -275,7 +277,8 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
 }
 
 bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
-                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+                     const XRaySledEntry &Sled,
+                     void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
   // Here we do the dance of replacing the following sled:
   //
   // xray_sled_n:
diff --git a/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
new file mode 100644
index 0000000000000..49544cadcae87
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp
@@ -0,0 +1,47 @@
+// Testing shared library support in basic logging mode.
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=false,xray_mode=xray-basic,xray_logfile_base=basic-mode-dso-,verbosity=1" XRAY_BASIC_OPTIONS="func_duration_threshold_us=0" %run %t/main.o 2>&1 | FileCheck %s
+// RUN: %llvm_xray account --format=csv --sort=funcid "`ls basic-mode-dso-* | head -1`" | FileCheck --check-prefix=ACCOUNT %s
+// RUN: rm basic-mode-dso-*
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+#include <unistd.h>
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+  sleep(1);
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  // Explicit patching to ensure the DSO has been loaded
+  __xray_patch();
+  instrumented_in_executable();
+  // CHECK: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+#include <unistd.h>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}
+
+// ACCOUNT: funcid,count,min,median,90%ile,99%ile,max,sum,debug,function
+// ACCOUNT-NEXT: 1,1,{{.*}}
+// ACCOUNT-NEXT: 16777217,1,{{.*}}
diff --git a/compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp b/compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp
new file mode 100644
index 0000000000000..d3e3b4e5f2904
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp
@@ -0,0 +1,14 @@
+// Test that the DSO-local runtime library has been linked if -fxray-enable-shared is passed.
+//
+// RUN: %clangxx -fxray-instrument -fxray-enable-shared %s -shared -o %t.so
+// RUN: llvm-nm %t.so | FileCheck %s --check-prefix ENABLED
+
+// RUN: %clangxx -fxray-instrument %s -shared -o %t.so
+// RUN: llvm-nm %t.so | FileCheck %s --check-prefix DISABLED
+//
+// REQUIRES: target=x86_64{{.*}}
+
+[[clang::xray_always_instrument]] int always_instrumented() { return 42; }
+
+// ENABLED: __start_xray_instr_map
+// DISABLED-NOT: __start_xray_instr_map
\ No newline at end of file
diff --git a/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp b/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
new file mode 100644
index 0000000000000..e7929c018352b
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
@@ -0,0 +1,110 @@
+// Check that we can patch and un-patch DSOs loaded with dlopen.
+//
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -rdynamic -fxray-instrument -fxray-enable-shared -std=c++11 %t/main.cpp -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=true" %run %t/main.o %t/testlib.so 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+#include <dlfcn.h>
+
+bool called = false;
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+  called = true;
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+typedef void (*dso_func_type)();
+
+int main(int argc, char **argv) {
+  if (argc < 2) {
+    printf("Shared library argument missing\n");
+    // CHECK-NOT: Shared library argument missing
+    return 1;
+  }
+
+  const char *dso_path = argv[1];
+
+  void *dso_handle = dlopen(dso_path, RTLD_LAZY);
+  if (!dso_handle) {
+    printf("Failed to load shared library\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+      return 1;
+    }
+    return 1;
+  }
+
+  dso_func_type instrumented_in_dso =
+      (dso_func_type)dlsym(dso_handle, "_Z19instrumented_in_dsov");
+  if (!instrumented_in_dso) {
+    printf("Failed to find symbol\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+      return 1;
+    }
+    return 1;
+  }
+
+  __xray_set_handler(test_handler);
+
+  instrumented_in_executable();
+  // CHECK: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+
+  auto status = __xray_unpatch();
+  printf("unpatching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: unpatching status: 1
+
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+
+  status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+
+  instrumented_in_executable();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+
+  dlclose(dso_handle);
+
+  status = __xray_unpatch();
+  printf("unpatching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: unpatching status: 1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}
diff --git a/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
new file mode 100644
index 0000000000000..9e80fc376795a
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/patch-premain-dso.cpp
@@ -0,0 +1,45 @@
+// Checking that DSOs are automatically patched upon load, if patch_premain is passed.
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=true,verbosity=1" %run %t/main.o 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  __xray_set_handler(test_handler);
+  instrumented_in_executable();
+  // CHECK: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}
diff --git a/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp
new file mode 100644
index 0000000000000..900af42c5334c
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching-dso.cpp
@@ -0,0 +1,75 @@
+// Check that we can patch and un-patch on demand, and that logging gets invoked
+// appropriately.
+//
+
+// RUN: split-file %s %t
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o
+
+// RUN: XRAY_OPTIONS="patch_premain=false" %run %t/main.o 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+
+bool called = false;
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+  called = true;
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+extern void instrumented_in_dso();
+
+int main() {
+  __xray_set_handler(test_handler);
+  instrumented_in_executable();
+  // CHECK: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK: instrumented_in_dso called
+  auto status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  instrumented_in_executable();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  instrumented_in_dso();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: instrumented_in_dso called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  status = __xray_unpatch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+  status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  __xray_remove_handler();
+  instrumented_in_executable();
+  // CHECK-NEXT: instrumented_in_executable called
+  instrumented_in_dso();
+  // CHECK-NEXT: instrumented_in_dso called
+  status = __xray_unpatch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+}
+
+//--- testlib.cpp
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void instrumented_in_dso() {
+  printf("instrumented_in_dso called\n");
+}

>From 920bd4a564682c99fcc9e3e168548c3936ba75dc Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <SebastianKreutzer at gmx.net>
Date: Mon, 6 May 2024 10:18:49 +0200
Subject: [PATCH 2/7] [XRay] Small ObjID fix

---
 compiler-rt/lib/xray/xray_init.cpp      | 2 +-
 compiler-rt/lib/xray/xray_interface.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index c3570d97701ee..74bbe016c856c 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -56,7 +56,7 @@ SpinMutex XRayInstrMapMutex;
 //  Contains maps for the main executable as well as DSOs.
 // std::vector<XRaySledMap> XRayInstrMaps;
 XRaySledMap *XRayInstrMaps;
-atomic_uint32_t XRayNumObjects;
+atomic_uint32_t XRayNumObjects{0};
 
 // Global flag to determine whether the flags have been initialized.
 atomic_uint8_t XRayFlagsInitialized{0};
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index dcd75c83b0b00..5ec94fb9d88ed 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -658,7 +658,7 @@ size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT {
 
 size_t __xray_max_function_id_in_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
   SpinMutexLock Guard(&XRayInstrMapMutex);
-  if (ObjId < 0 || ObjId > atomic_load(&XRayNumObjects, memory_order_acquire))
+  if (ObjId < 0 || ObjId >= atomic_load(&XRayNumObjects, memory_order_acquire))
     return 0;
   return XRayInstrMaps[ObjId].Functions;
 }

>From e03994ad637df5007b8ae3bdb7e87347ee303df2 Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <SebastianKreutzer at gmx.net>
Date: Mon, 6 May 2024 11:10:10 +0200
Subject: [PATCH 3/7] Address reviewer feedback

---
 compiler-rt/lib/xray/xray_init.cpp | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index 74bbe016c856c..b93ad2e172a6d 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -102,9 +102,8 @@ __xray_register_sleds(const XRaySledEntry *SledsBegin,
     return -1;
   }
 
-  if (Verbosity()) {
+  if (Verbosity())
     Report("Registering %d new functions!\n", SledMap.Functions);
-  }
 
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
@@ -145,9 +144,15 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
   // Pre-allocation takes up approx. 5kB for XRayMaxObjects=64.
   XRayInstrMaps = allocateBuffer<XRaySledMap>(XRayMaxObjects);
 
-  __xray_register_sleds(__start_xray_instr_map, __stop_xray_instr_map,
+  int MainBinaryId = __xray_register_sleds(__start_xray_instr_map, __stop_xray_instr_map,
                         __start_xray_fn_idx, __stop_xray_fn_idx, false, {});
 
+  // The executable should always get ID 0.
+  if (MainBinaryId != 0) {
+    Report("Registering XRay sleds failed.\n");
+    return;
+  }
+
   atomic_store(&XRayInitialized, true, memory_order_release);
 
 #ifndef XRAY_NO_PREINIT
@@ -187,8 +192,12 @@ SANITIZER_INTERFACE_ATTRIBUTE int32_t __xray_register_dso(
 
 SANITIZER_INTERFACE_ATTRIBUTE bool
 __xray_deregister_dso(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
-  // Make sure XRay has been initialized in the main executable.
-  __xray_init();
+
+  if (!atomic_load(&XRayInitialized, memory_order_acquire)) {
+    if (Verbosity())
+      Report("XRay has not been initialized. Cannot deregister DSO.\n");
+    return false;
+  }
 
   if (ObjId <= 0 || ObjId >= __xray_num_objects()) {
     if (Verbosity())
@@ -210,8 +219,9 @@ __xray_deregister_dso(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
       if (Verbosity())
         Report("Can't deregister object with ID %d: object is not loaded.\n",
                ObjId);
+      return false;
     }
-    // This is all we have to do here.
+    // Mark DSO as unloaded. No need to unpatch.
     Entry.Loaded = false;
   }
 

>From d52ba2472c9a28c7efc09e64fafc938c9bc02648 Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <SebastianKreutzer at gmx.net>
Date: Mon, 6 May 2024 11:10:59 +0200
Subject: [PATCH 4/7] Minor formatting fixes

---
 compiler-rt/lib/xray/CMakeLists.txt                           | 2 +-
 compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index 165e61b6f4576..2e8517a0a2081 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -371,7 +371,7 @@ else() # not Apple
      LINK_LIBS ${XRAY_LINK_LIBS}
      DEFS ${XRAY_COMMON_DEFINITIONS}
      OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS} RTXray
-     PARENT_TARGET xray)  
+     PARENT_TARGET xray)
     # FDR mode runtime archive (addon for clang_rt.xray)
     add_compiler_rt_runtime(clang_rt.xray-fdr
       STATIC
diff --git a/compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp b/compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp
index d3e3b4e5f2904..dbc0adc568ce5 100644
--- a/compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp
+++ b/compiler-rt/test/xray/TestCases/Posix/clang-enable-shared.cpp
@@ -11,4 +11,4 @@
 [[clang::xray_always_instrument]] int always_instrumented() { return 42; }
 
 // ENABLED: __start_xray_instr_map
-// DISABLED-NOT: __start_xray_instr_map
\ No newline at end of file
+// DISABLED-NOT: __start_xray_instr_map

>From 6f2a1e1c417bc297a9986bef211c58487ab03100 Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <SebastianKreutzer at gmx.net>
Date: Mon, 6 May 2024 13:19:02 +0200
Subject: [PATCH 5/7] Remove CMake message, address missing review item

---
 compiler-rt/lib/xray/CMakeLists.txt | 1 -
 compiler-rt/lib/xray/xray_init.cpp  | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index 2e8517a0a2081..39b7ad7c0519a 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -328,7 +328,6 @@ if (APPLE)
     LINK_LIBS ${XRAY_LINK_LIBS}
     PARENT_TARGET xray)
 else() # not Apple
-  message("DSO Supported Archs: ${XRAY_DSO_SUPPORTED_ARCH}")
   foreach(arch ${XRAY_SUPPORTED_ARCH})
     if(NOT CAN_TARGET_${arch})
       continue()
diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index b93ad2e172a6d..abe3465366902 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -71,6 +71,7 @@ __xray_register_sleds(const XRaySledEntry *SledsBegin,
                       const XRayFunctionSledIndex *FnIndexEnd, bool FromDSO,
                       XRayTrampolines Trampolines) XRAY_NEVER_INSTRUMENT {
   if (!SledsBegin || !SledsEnd) {
+    Report("Invalid XRay sleds.\n");
     return -1;
   }
   XRaySledMap SledMap;

>From 1c6bfff2dbf5133c8d29e0868aefdb0c4b04d22b Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <SebastianKreutzer at gmx.net>
Date: Mon, 6 May 2024 14:10:02 +0200
Subject: [PATCH 6/7] Fix function index bug

---
 compiler-rt/lib/xray/xray_init.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index abe3465366902..da1c3f70cf573 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -95,7 +95,7 @@ __xray_register_sleds(const XRaySledEntry *SledsBegin,
         LastFnAddr = Function;
       }
     }
-
+    SledMap.SledsIndex = nullptr;
     SledMap.Functions = CountFunctions;
   }
   if (SledMap.Functions >= XRayMaxFunctions) {

>From 917d5fa29d71f4bc66d277fd089fd51887101a2d Mon Sep 17 00:00:00 2001
From: Sebastian Kreutzer <SebastianKreutzer at gmx.net>
Date: Wed, 10 Jul 2024 15:25:59 +0200
Subject: [PATCH 7/7] fixup! [XRay] Add DSO support for XRay instrumentation on
 X86_64

---
 clang/lib/Driver/ToolChains/CommonArgs.cpp    |   4 +-
 clang/lib/Driver/XRayArgs.cpp                 |  14 +-
 clang/test/Driver/XRay/xray-enable-shared.cpp |  17 ++
 compiler-rt/lib/xray/CMakeLists.txt           |  16 +-
 compiler-rt/lib/xray/xray_dso_init.cpp        |   2 +-
 compiler-rt/lib/xray/xray_init.cpp            |  26 ++-
 compiler-rt/lib/xray/xray_interface.cpp       | 142 +++++++------
 .../test/xray/TestCases/Posix/dlopen.cpp      |   5 +-
 .../xray/TestCases/Posix/dso-dep-chains.cpp   | 198 ++++++++++++++++++
 9 files changed, 336 insertions(+), 88 deletions(-)
 create mode 100644 clang/test/Driver/XRay/xray-enable-shared.cpp
 create mode 100644 compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp

diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 399bf795ce394..03f2897f94b6f 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1522,9 +1522,9 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
 bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringList &CmdArgs) {
   if (Args.hasArg(options::OPT_shared)) {
     if (TC.getXRayArgs().needsXRayDSORt()) {
-      CmdArgs.push_back("-whole-archive");
+      CmdArgs.push_back("--whole-archive");
       CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray-dso"));
-      CmdArgs.push_back("-no-whole-archive");
+      CmdArgs.push_back("--no-whole-archive");
       return true;
     }
   } else if (TC.getXRayArgs().needsXRayRt()) {
diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp
index 7809cd7ef7c75..d483ee57bbe41 100644
--- a/clang/lib/Driver/XRayArgs.cpp
+++ b/clang/lib/Driver/XRayArgs.cpp
@@ -64,9 +64,21 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
   }
 
   if (Args.hasFlag(options::OPT_fxray_enable_shared,
-                   options::OPT_fno_xray_enable_shared, false))
+                   options::OPT_fno_xray_enable_shared, false)) {
     XRayEnableShared = true;
 
+    // DSO instrumentation is currently limited to x86_64
+    if (Triple.getArch() != llvm::Triple::x86_64) {
+      D.Diag(diag::err_drv_unsupported_opt_for_target)
+          << "-fxray-enable-shared" << Triple.str();
+    }
+
+    unsigned PICLvl = std::get<1>(tools::ParsePICArgs(TC, Args));
+    if (!PICLvl) {
+      D.Diag(diag::err_opt_not_valid_without_opt) << "-fxray-enable-shared" << "-fPIC";
+    }
+  }
+
   // Both XRay and -fpatchable-function-entry use
   // TargetOpcode::PATCHABLE_FUNCTION_ENTER.
   if (Arg *A = Args.getLastArg(options::OPT_fpatchable_function_entry_EQ))
diff --git a/clang/test/Driver/XRay/xray-enable-shared.cpp b/clang/test/Driver/XRay/xray-enable-shared.cpp
new file mode 100644
index 0000000000000..8722c24b2f00a
--- /dev/null
+++ b/clang/test/Driver/XRay/xray-enable-shared.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fPIC -fxray-instrument -fxray-enable-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fpic -fxray-instrument -fxray-enable-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fxray-instrument -fxray-enable-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-PIC -fxray-instrument -fxray-enable-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC
+// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-pic -fxray-instrument -fxray-enable-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC
+
+// On 64 bit darwin, PIC is always enabled
+// RUN: %clang -### --target=x86_64-apple-darwin -fxray-instrument -fxray-enable-shared -c %s -o /dev/null 2>&1 | FileCheck %s
+
+// Check unsupported targets
+// RUN: not %clang -### --target=aarch64-pc-freebsd -fPIC -fxray-instrument -fxray-enable-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
+// RUN: not %clang -### --target=arm64-apple-macos -fPIC -fxray-instrument -fxray-enable-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
+
+// CHECK: "-cc1" {{.*}}"-fxray-instrument" {{.*}}"-fxray-enable-shared"
+// ERR-TARGET:   error: unsupported option '-fxray-enable-shared' for target
+// ERR-PIC:   error: option '-fxray-enable-shared' cannot be specified without '-fPIC'
+
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index 39b7ad7c0519a..f38c07420c9ab 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -393,14 +393,14 @@ else() # not Apple
       PARENT_TARGET xray)
     # Profiler Mode runtime
     add_compiler_rt_runtime(clang_rt.xray-profiling
-     STATIC
-     ARCHS ${arch}
-     CFLAGS ${XRAY_CFLAGS}
-     LINK_FLAGS ${XRAY_LINK_FLAGS}
-     LINK_LIBS ${XRAY_LINK_LIBS}
-     DEFS ${XRAY_COMMON_DEFINITIONS}
-     OBJECT_LIBS RTXrayPROFILING
-     PARENT_TARGET xray)
+      STATIC
+      ARCHS ${arch}
+      CFLAGS ${XRAY_CFLAGS}
+      LINK_FLAGS ${XRAY_LINK_FLAGS}
+      LINK_LIBS ${XRAY_LINK_LIBS}
+      DEFS ${XRAY_COMMON_DEFINITIONS}
+      OBJECT_LIBS RTXrayPROFILING
+      PARENT_TARGET xray)
 
     if (${arch} IN_LIST XRAY_DSO_SUPPORTED_ARCH)
       # TODO: Only implemented for X86 at the moment
diff --git a/compiler-rt/lib/xray/xray_dso_init.cpp b/compiler-rt/lib/xray/xray_dso_init.cpp
index ad5f91f3f2448..eb754db54c64f 100644
--- a/compiler-rt/lib/xray/xray_dso_init.cpp
+++ b/compiler-rt/lib/xray/xray_dso_init.cpp
@@ -28,7 +28,7 @@ __attribute__((visibility("hidden")));
 extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak))
 __attribute__((visibility("hidden")));
 
-#if SANITIZER_MAC
+#if SANITIZER_APPLE
 // HACK: This is a temporary workaround to make XRay build on
 // Darwin, but it will probably not work at runtime.
 extern const XRaySledEntry __start_xray_instr_map[] = {};
diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp
index da1c3f70cf573..9db9f529e4004 100644
--- a/compiler-rt/lib/xray/xray_init.cpp
+++ b/compiler-rt/lib/xray/xray_init.cpp
@@ -45,17 +45,15 @@ using namespace __xray;
 // the weak symbols defined above (__start_xray_inst_map and
 // __stop_xray_instr_map) to initialise the instrumentation map that XRay uses
 // for runtime patching/unpatching of instrumentation points.
-//
-// FIXME: Support DSO instrumentation maps too. The current solution only works
-// for statically linked executables.
 atomic_uint8_t XRayInitialized{0};
 
 // This should always be updated before XRayInitialized is updated.
 SpinMutex XRayInstrMapMutex;
-// XRaySledMap XRayInstrMap;
+
 //  Contains maps for the main executable as well as DSOs.
-// std::vector<XRaySledMap> XRayInstrMaps;
 XRaySledMap *XRayInstrMaps;
+
+// Number of binary objects registered.
 atomic_uint32_t XRayNumObjects{0};
 
 // Global flag to determine whether the flags have been initialized.
@@ -64,6 +62,9 @@ atomic_uint8_t XRayFlagsInitialized{0};
 // A mutex to allow only one thread to initialize the XRay data structures.
 SpinMutex XRayInitMutex;
 
+// Registers XRay sleds and trampolines coming from the main executable or one
+// of the linked DSOs.
+// Returns the object ID if registration is successful, -1 otherwise.
 int32_t
 __xray_register_sleds(const XRaySledEntry *SledsBegin,
                       const XRaySledEntry *SledsEnd,
@@ -145,8 +146,9 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
   // Pre-allocation takes up approx. 5kB for XRayMaxObjects=64.
   XRayInstrMaps = allocateBuffer<XRaySledMap>(XRayMaxObjects);
 
-  int MainBinaryId = __xray_register_sleds(__start_xray_instr_map, __stop_xray_instr_map,
-                        __start_xray_fn_idx, __stop_xray_fn_idx, false, {});
+  int MainBinaryId =
+      __xray_register_sleds(__start_xray_instr_map, __stop_xray_instr_map,
+                            __start_xray_fn_idx, __stop_xray_fn_idx, false, {});
 
   // The executable should always get ID 0.
   if (MainBinaryId != 0) {
@@ -162,6 +164,9 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
 #endif
 }
 
+// Registers XRay sleds and trampolines of an instrumented DSO.
+// Returns the object ID if registration is successful, -1 otherwise.
+//
 // Default visibility is hidden, so we have to explicitly make it visible to
 // DSO.
 SANITIZER_INTERFACE_ATTRIBUTE int32_t __xray_register_dso(
@@ -191,6 +196,11 @@ SANITIZER_INTERFACE_ATTRIBUTE int32_t __xray_register_dso(
   return ObjId;
 }
 
+// Deregisters a DSO from the main XRay runtime.
+// Called from the DSO-local runtime when the library is unloaded (e.g. if
+// dlclose is called).
+// Returns true if the object ID is valid and the DSO was successfully
+// deregistered.
 SANITIZER_INTERFACE_ATTRIBUTE bool
 __xray_deregister_dso(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
 
@@ -220,7 +230,7 @@ __xray_deregister_dso(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
       if (Verbosity())
         Report("Can't deregister object with ID %d: object is not loaded.\n",
                ObjId);
-      return false;
+      return true;
     }
     // Mark DSO as unloaded. No need to unpatch.
     Entry.Loaded = false;
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index 5ec94fb9d88ed..9126495027b20 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "xray_interface_internal.h"
+#include "llvm/Support/ErrorHandling.h"
 
 #include <cinttypes>
 #include <cstdio>
@@ -289,24 +290,9 @@ XRayPatchingStatus patchFunction(int32_t FuncId, int32_t ObjId,
 // controlPatching implements the common internals of the patching/unpatching
 // implementation. |Enable| defines whether we're enabling or disabling the
 // runtime XRay instrumentation.
-XRayPatchingStatus controlPatching(bool Enable,
-                                   int32_t ObjId) XRAY_NEVER_INSTRUMENT {
-
-  if (!atomic_load(&XRayInitialized, memory_order_acquire))
-    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
-
-  uint8_t NotPatching = false;
-  if (!atomic_compare_exchange_strong(
-          &XRayPatching, &NotPatching, true, memory_order_acq_rel))
-    return XRayPatchingStatus::ONGOING; // Already patching.
-
-  uint8_t PatchingSuccess = false;
-  auto XRayPatchingStatusResetter =
-      at_scope_exit([&PatchingSuccess] {
-        if (!PatchingSuccess)
-          atomic_store(&XRayPatching, false, memory_order_release);
-      });
-
+// This function should only be called after ensuring that XRay is initialized
+// and no other thread is currently patching.
+XRayPatchingStatus controlPatchingObjectUnchecked(bool Enable, int32_t ObjId) {
   XRaySledMap InstrMap;
   {
     SpinMutexLock Guard(&XRayInstrMapMutex);
@@ -382,10 +368,80 @@ XRayPatchingStatus controlPatching(bool Enable,
     patchSled(Sled, Enable, PackedId, InstrMap.Trampolines);
   }
   atomic_store(&XRayPatching, false, memory_order_release);
-  PatchingSuccess = true;
   return XRayPatchingStatus::SUCCESS;
 }
 
+
+// Controls patching for all registered objects.
+// Returns: SUCCESS, if patching succeeds for all objects.
+//          NOT_INITIALIZED, if one or more objects returned NOT_INITIALIZED
+//             but none failed.
+//          FAILED, if patching of one or more objects failed.
+XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
+    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+  uint8_t NotPatching = false;
+  if (!atomic_compare_exchange_strong(
+          &XRayPatching, &NotPatching, true, memory_order_acq_rel))
+    return XRayPatchingStatus::ONGOING; // Already patching.
+
+  auto XRayPatchingStatusResetter =
+      at_scope_exit([] {
+        atomic_store(&XRayPatching, false, memory_order_release);
+      });
+
+  unsigned NumObjects = __xray_num_objects();
+
+  XRayPatchingStatus CombinedStatus{NOT_INITIALIZED};
+  for (unsigned I = 0; I < NumObjects; ++I) {
+    if (!isObjectLoaded(I))
+      continue;
+    auto LastStatus = controlPatchingObjectUnchecked(Enable, I);
+    switch (LastStatus) {
+    case SUCCESS:
+      if (CombinedStatus == NOT_INITIALIZED)
+        CombinedStatus = SUCCESS;
+      break;
+    case FAILED:
+      // Report failure, but try to patch the remaining objects
+      CombinedStatus = FAILED;
+      break;
+    case NOT_INITIALIZED:
+      // XRay has been initialized but there are no sleds available for this
+      // object. Try to patch remaining objects.
+      if (CombinedStatus != FAILED)
+        CombinedStatus = NOT_INITIALIZED;
+      break;
+    case ONGOING:
+      llvm_unreachable("Status ONGOING should not appear at this point");
+    default:
+      llvm_unreachable("Unhandled patching status");
+    }
+  }
+  return CombinedStatus;
+}
+
+// Controls patching for one object.
+XRayPatchingStatus controlPatching(bool Enable,
+                                   int32_t ObjId) XRAY_NEVER_INSTRUMENT {
+
+  if (!atomic_load(&XRayInitialized, memory_order_acquire))
+    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+  uint8_t NotPatching = false;
+  if (!atomic_compare_exchange_strong(
+          &XRayPatching, &NotPatching, true, memory_order_acq_rel))
+    return XRayPatchingStatus::ONGOING; // Already patching.
+
+  auto XRayPatchingStatusResetter =
+      at_scope_exit([] {
+          atomic_store(&XRayPatching, false, memory_order_release);
+      });
+
+  return controlPatchingObjectUnchecked(Enable, ObjId);
+}
+
 XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId, int32_t ObjId,
                                             bool Enable) XRAY_NEVER_INSTRUMENT {
   XRaySledMap InstrMap;
@@ -420,7 +476,7 @@ XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId, int32_t ObjId,
     return XRayPatchingStatus::FAILED;
   }
 
-  // Here we compute the minumum sled and maximum sled associated with a
+  // Here we compute the minimum sled and maximum sled associated with a
   // particular function ID.
   XRayFunctionSledIndex SledRange;
   if (InstrMap.SledsIndex) {
@@ -518,28 +574,7 @@ uint16_t __xray_register_event_type(
 }
 
 XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT {
-  XRayPatchingStatus CombinedStatus{SUCCESS};
-  for (size_t I = 0; I < __xray_num_objects(); ++I) {
-    if (!isObjectLoaded(I))
-      continue;
-    auto LastStatus = controlPatching(true, I);
-    switch (LastStatus) {
-    case FAILED:
-      CombinedStatus = FAILED;
-      break;
-    case NOT_INITIALIZED:
-      if (CombinedStatus != FAILED)
-        CombinedStatus = NOT_INITIALIZED;
-      break;
-    case ONGOING:
-      if (CombinedStatus != FAILED && CombinedStatus != NOT_INITIALIZED)
-        CombinedStatus = ONGOING;
-      break;
-    default:
-      break;
-    }
-  }
-  return CombinedStatus;
+  return controlPatching(true);
 }
 
 XRayPatchingStatus __xray_patch_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
@@ -547,28 +582,7 @@ XRayPatchingStatus __xray_patch_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
 }
 
 XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT {
-  XRayPatchingStatus CombinedStatus{SUCCESS};
-  for (size_t I = 0; I < __xray_num_objects(); ++I) {
-    if (!isObjectLoaded(I))
-      continue;
-    auto LastStatus = controlPatching(false, I);
-    switch (LastStatus) {
-    case FAILED:
-      CombinedStatus = FAILED;
-      break;
-    case NOT_INITIALIZED:
-      if (CombinedStatus != FAILED)
-        CombinedStatus = NOT_INITIALIZED;
-      break;
-    case ONGOING:
-      if (CombinedStatus != FAILED && CombinedStatus != NOT_INITIALIZED)
-        CombinedStatus = ONGOING;
-      break;
-    default:
-      break;
-    }
-  }
-  return CombinedStatus;
+  return controlPatching(false);
 }
 
 XRayPatchingStatus __xray_unpatch_object(int32_t ObjId) XRAY_NEVER_INSTRUMENT {
diff --git a/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp b/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
index e7929c018352b..a370daa793ebc 100644
--- a/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
+++ b/compiler-rt/test/xray/TestCases/Posix/dlopen.cpp
@@ -4,7 +4,7 @@
 // RUN: split-file %s %t
 // RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so
 // RUN: %clangxx_xray -g -fPIC -rdynamic -fxray-instrument -fxray-enable-shared -std=c++11 %t/main.cpp -o %t/main.o
-
+//
 // RUN: XRAY_OPTIONS="patch_premain=true" %run %t/main.o %t/testlib.so 2>&1 | FileCheck %s
 
 // REQUIRES: target=x86_64{{.*}}
@@ -16,11 +16,8 @@
 #include <cstdio>
 #include <dlfcn.h>
 
-bool called = false;
-
 void test_handler(int32_t fid, XRayEntryType type) {
   printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
-  called = true;
 }
 
 [[clang::xray_always_instrument]] void instrumented_in_executable() {
diff --git a/compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp b/compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp
new file mode 100644
index 0000000000000..f2cc8bd4be5a3
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Posix/dso-dep-chains.cpp
@@ -0,0 +1,198 @@
+// Check that loading libraries with different modes (RTLD_LOCAL/RTLD_GLOBAL)
+// and dependencies on other DSOs work correctly.
+//
+
+// RUN: split-file %s %t
+//
+// Build shared libs with dependencies b->c and e->f
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testliba.cpp -o %t/testliba.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlibc.cpp -o %t/testlibc.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlibb.cpp %t/testlibc.so -o %t/testlibb.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlibd.cpp -o %t/testlibd.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlibf.cpp -o %t/testlibf.so
+// RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-enable-shared -shared -std=c++11 %t/testlibe.cpp %t/testlibf.so -o %t/testlibe.so
+//
+// Executable links with a and b explicitly and loads d and e at runtime.
+// RUN: %clangxx_xray -g -fPIC -rdynamic -fxray-instrument -fxray-enable-shared -std=c++11 %t/main.cpp %t/testliba.so %t/testlibb.so -o %t/main.o
+//
+// RUN:  XRAY_OPTIONS="patch_premain=true" %run %t/main.o %t/testlibd.so %t/testlibe.so 2>&1 | FileCheck %s
+
+// REQUIRES: target=x86_64{{.*}}
+
+//--- main.cpp
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+#include <dlfcn.h>
+
+[[clang::xray_never_instrument]] void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, object=%d, fn=%d, type=%d\n", fid, (fid >> 24) & 0xFF, fid & 0x00FFFFFF,  static_cast<int32_t>(type));
+}
+
+[[clang::xray_always_instrument]] void instrumented_in_executable() {
+  printf("instrumented_in_executable called\n");
+}
+
+typedef void (*dso_func_type)();
+
+[[clang::xray_never_instrument]] void* load_dso(const char* path, int mode) {
+  void *dso_handle = dlopen(path, mode);
+  if (!dso_handle) {
+    printf("failed to load shared library\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+    }
+    return nullptr;
+  }
+  return dso_handle;
+}
+
+[[clang::xray_never_instrument]] void find_and_call(void* dso_handle, const char* fn) {
+  dso_func_type dso_fn =
+      (dso_func_type)dlsym(dso_handle, fn);
+  if (!dso_fn) {
+    printf("failed to find symbol\n");
+    char *error = dlerror();
+    if (error) {
+      fprintf(stderr, "%s\n", error);
+    }
+    return;
+  }
+  dso_fn();
+}
+
+extern void a();
+extern void b();
+
+int main(int argc, char **argv) {
+
+  if (argc < 3) {
+    printf("Shared library arguments missing\n");
+    // CHECK-NOT: Shared library arguments missing
+    return 1;
+  }
+
+  const char *dso_path_d = argv[1];
+  const char *dso_path_e = argv[2];
+
+  __xray_set_handler(test_handler);
+
+  instrumented_in_executable();
+  // CHECK: called: {{[0-9]+}}, object=0, fn={{[0-9]+}}, type=0
+  // CHECK-NEXT: instrumented_in_executable called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=0, fn={{[0-9]+}}, type=1
+
+  a();
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ1:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: a called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ1]], fn=1, type=1
+
+  // Make sure this object ID does not appear again
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ1]]
+
+  b(); // b calls c
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ2:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: b called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ3:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: c called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ3]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ3]]
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ2]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ2]]
+
+  // Now check explicit loading with RTLD_LOCAL
+
+  void *dso_handle_d = load_dso(dso_path_d, RTLD_LAZY | RTLD_LOCAL);
+  void *dso_handle_e = load_dso(dso_path_e, RTLD_LAZY | RTLD_LOCAL);
+  // CHECK-NOT: failed to load shared library
+
+  find_and_call(dso_handle_d, "_Z1dv");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ4:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: d called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ4]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ4]]
+
+  find_and_call(dso_handle_e, "_Z1ev");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ5:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: e called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ6:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: f called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ6]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ6]]
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ5]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ5]]
+
+  // Unload DSOs
+  dlclose(dso_handle_d);
+  dlclose(dso_handle_e);
+
+  // Repeat test with RTLD_GLOBAL
+  dso_handle_d = load_dso(dso_path_d, RTLD_LAZY | RTLD_GLOBAL);
+  dso_handle_e = load_dso(dso_path_e, RTLD_LAZY | RTLD_GLOBAL);
+  // CHECK-NOT: failed to load shared library
+
+  find_and_call(dso_handle_d, "_Z1dv");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ7:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: d called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ7]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ7]]
+
+  find_and_call(dso_handle_e, "_Z1ev");
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ8:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: e called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ9:[0-9]+]], fn=1, type=0
+  // CHECK-NEXT: f called
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ9]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ9]]
+  // CHECK-NEXT: called: {{[0-9]+}}, object=[[OBJ8]], fn=1, type=1
+  // CHECK-NOT: called: {{[0-9]+}}, object=[[OBJ8]]
+
+  auto status = __xray_unpatch();
+  printf("unpatching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: unpatching status: 1
+
+  dlclose(dso_handle_d);
+  dlclose(dso_handle_e);
+
+}
+
+//--- libgenmacro.inc
+#include <cstdio>
+// Helper macros to quickly generate libraries containing a single function.
+#define GENERATE_LIB(NAME)                        \
+  [[clang::xray_always_instrument]] void NAME() { \
+    printf(#NAME " called\n");                    \
+  }
+
+#define GENERATE_LIB_WITH_CALL(NAME, FN)          \
+  extern void FN();                               \
+  [[clang::xray_always_instrument]] void NAME() { \
+    printf(#NAME " called\n");                    \
+    FN();                                         \
+  }
+
+//--- testliba.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(a)
+
+//--- testlibb.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB_WITH_CALL(b, c)
+
+//--- testlibc.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(c)
+
+//--- testlibd.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(d)
+
+//--- testlibe.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB_WITH_CALL(e, f)
+
+//--- testlibf.cpp
+#include "libgenmacro.inc"
+GENERATE_LIB(f)



More information about the llvm-commits mailing list