[compiler-rt] r280969 - Revert "[XRay] ARM 32-bit no-Thumb support in compiler-rt"

Renato Golin via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 8 10:13:15 PDT 2016


Author: rengolin
Date: Thu Sep  8 12:13:15 2016
New Revision: 280969

URL: http://llvm.org/viewvc/llvm-project?rev=280969&view=rev
Log:
Revert "[XRay] ARM 32-bit no-Thumb support in compiler-rt"

This reverts commit r280890, as the related LLVM commit broke the thumb bots.

Removed:
    compiler-rt/trunk/lib/xray/xray_arm.cc
    compiler-rt/trunk/lib/xray/xray_trampoline_arm.S
    compiler-rt/trunk/lib/xray/xray_x86_64.cc
Modified:
    compiler-rt/trunk/cmake/config-ix.cmake
    compiler-rt/trunk/lib/sanitizer_common/scripts/gen_dynamic_list.py
    compiler-rt/trunk/lib/xray/CMakeLists.txt
    compiler-rt/trunk/lib/xray/xray_inmemory_log.cc
    compiler-rt/trunk/lib/xray/xray_interface.cc
    compiler-rt/trunk/lib/xray/xray_interface_internal.h

Modified: compiler-rt/trunk/cmake/config-ix.cmake
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/cmake/config-ix.cmake?rev=280969&r1=280968&r2=280969&view=diff
==============================================================================
--- compiler-rt/trunk/cmake/config-ix.cmake (original)
+++ compiler-rt/trunk/cmake/config-ix.cmake Thu Sep  8 12:13:15 2016
@@ -161,7 +161,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86}
 set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
 set(ALL_ESAN_SUPPORTED_ARCH ${X86_64})
 set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64})
-set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32})
+set(ALL_XRAY_SUPPORTED_ARCH ${X86_64})
 
 if(APPLE)
   include(CompilerRTDarwinUtils)

Modified: compiler-rt/trunk/lib/sanitizer_common/scripts/gen_dynamic_list.py
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/scripts/gen_dynamic_list.py?rev=280969&r1=280968&r2=280969&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/scripts/gen_dynamic_list.py (original)
+++ compiler-rt/trunk/lib/sanitizer_common/scripts/gen_dynamic_list.py Thu Sep  8 12:13:15 2016
@@ -19,7 +19,6 @@ import os
 import re
 import subprocess
 import sys
-import platform
 
 new_delete = set([
                   '_Znam', '_ZnamRKSt9nothrow_t',    # operator new[](unsigned long)
@@ -51,7 +50,7 @@ def get_global_functions(library):
     raise subprocess.CalledProcessError(nm_proc.returncode, nm)
   func_symbols = ['T', 'W']
   # On PowerPC, nm prints function descriptors from .data section.
-  if platform.uname()[4] in ["powerpc", "ppc64"]:
+  if os.uname()[4] in ["powerpc", "ppc64"]:
     func_symbols += ['D']
   for line in nm_out:
     cols = line.split(' ')

Modified: compiler-rt/trunk/lib/xray/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/CMakeLists.txt?rev=280969&r1=280968&r2=280969&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/CMakeLists.txt (original)
+++ compiler-rt/trunk/lib/xray/CMakeLists.txt Thu Sep  8 12:13:15 2016
@@ -8,17 +8,9 @@ set(XRAY_SOURCES
 )
 
 set(x86_64_SOURCES
-		xray_x86_64.cc
 		xray_trampoline_x86_64.S
 		${XRAY_SOURCES})
 
-set(arm_SOURCES
-		xray_arm.cc
-		xray_trampoline_arm.S
-		${XRAY_SOURCES})
-
-set(armhf_SOURCES ${arm_SOURCES})
-
 include_directories(..)
 include_directories(../../include)
 

Removed: compiler-rt/trunk/lib/xray/xray_arm.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_arm.cc?rev=280968&view=auto
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_arm.cc (original)
+++ compiler-rt/trunk/lib/xray/xray_arm.cc (removed)
@@ -1,131 +0,0 @@
-//===-- xray_arm.cpp --------------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of XRay, a dynamic runtime instrumentation system.
-//
-// Implementation of ARM-specific routines (32-bit).
-//
-//===----------------------------------------------------------------------===//
-#include "xray_interface_internal.h"
-#include "sanitizer_common/sanitizer_common.h"
-#include <atomic>
-#include <cassert>
-
-namespace __xray {
-
-// The machine codes for some instructions used in runtime patching.
-enum class PatchOpcodes : uint32_t
-{
-  PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr}
-  PO_BlxIp = 0xE12FFF3C, // BLX ip
-  PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr}
-  PO_B20 = 0xEA000005 // B #20
-};
-
-// 0xUUUUWXYZ -> 0x000W0XYZ
-inline static uint32_t getMovwMask(const uint32_t Value) {
-  return (Value & 0xfff) | ((Value & 0xf000) << 4);
-}
-
-// 0xWXYZUUUU -> 0x000W0XYZ
-inline static uint32_t getMovtMask(const uint32_t Value) {
-  return getMovwMask(Value >> 16);
-}
-
-// Writes the following instructions:
-//   MOVW R<regNo>, #<lower 16 bits of the |Value|>
-//   MOVT R<regNo>, #<higher 16 bits of the |Value|>
-inline static uint32_t* write32bitLoadReg(uint8_t regNo, uint32_t* Address,
-    const uint32_t Value) {
-  //This is a fatal error: we cannot just report it and continue execution.
-  assert(regNo <= 15 && "Register number must be 0 to 15.");
-  // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ
-  *Address = (0xE3000000 | (uint32_t(regNo)<<12) | getMovwMask(Value));
-  Address++;
-  // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ
-  *Address = (0xE3400000 | (uint32_t(regNo)<<12) | getMovtMask(Value));
-  return Address + 1;
-}
-
-// Writes the following instructions:
-//   MOVW r0, #<lower 16 bits of the |Value|>
-//   MOVT r0, #<higher 16 bits of the |Value|>
-inline static uint32_t *Write32bitLoadR0(uint32_t *Address,
-                                         const uint32_t Value) {
-  return write32bitLoadReg(0, Address, Value);
-}
-
-// Writes the following instructions:
-//   MOVW ip, #<lower 16 bits of the |Value|>
-//   MOVT ip, #<higher 16 bits of the |Value|>
-inline static uint32_t *Write32bitLoadIP(uint32_t *Address,
-                                         const uint32_t Value) {
-  return write32bitLoadReg(12, Address, Value);
-}
-
-inline static bool patchSled(const bool Enable, const uint32_t FuncId,
-                             const XRaySledEntry &Sled, void (*TracingHook)()) {
-  // When |Enable| == true,
-  // We replace the following compile-time stub (sled):
-  //
-  // xray_sled_n:
-  //   B #20
-  //   6 NOPs (24 bytes)
-  //
-  // With the following runtime patch:
-  //
-  // xray_sled_n:
-  //   PUSH {r0, lr}
-  //   MOVW r0, #<lower 16 bits of function ID>
-  //   MOVT r0, #<higher 16 bits of function ID>
-  //   MOVW ip, #<lower 16 bits of address of TracingHook>
-  //   MOVT ip, #<higher 16 bits of address of TracingHook>
-  //   BLX ip
-  //   POP {r0, lr}
-  //
-  // Replacement of the first 4-byte instruction should be the last and atomic
-  // operation, so that the user code which reaches the sled concurrently
-  // either jumps over the whole sled, or executes the whole sled when the
-  // latter is ready.
-  //
-  // When |Enable|==false, we set back the first instruction in the sled to be
-  //   B #20
-
-  uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
-  if (Enable) {
-    uint32_t *CurAddress = FirstAddress + 1;
-    CurAddress =
-        Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
-    CurAddress =
-        Write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
-    *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
-    CurAddress++;
-    *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
-    std::atomic_store_explicit(
-        reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
-        uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
-  } else {
-    std::atomic_store_explicit(
-        reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
-        uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
-  }
-  return true;
-}
-
-bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
-                        const XRaySledEntry &Sled) {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
-}
-
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled) {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
-}
-
-} // namespace __xray

Modified: compiler-rt/trunk/lib/xray/xray_inmemory_log.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_inmemory_log.cc?rev=280969&r1=280968&r2=280969&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_inmemory_log.cc (original)
+++ compiler-rt/trunk/lib/xray/xray_inmemory_log.cc Thu Sep  8 12:13:15 2016
@@ -24,14 +24,7 @@
 #include <sys/types.h>
 #include <thread>
 #include <unistd.h>
-
-#if defined(__x86_64__)
-  #include <x86intrin.h>
-#elif defined(__arm__)
-  static const int64_t NanosecondsPerSecond = 1000LL*1000*1000;
-#else
-  #error "Unsupported CPU Architecture"
-#endif /* CPU architecture */
+#include <x86intrin.h>
 
 #include "sanitizer_common/sanitizer_libc.h"
 #include "xray/xray_records.h"
@@ -68,7 +61,6 @@ static void retryingWriteAll(int Fd, cha
   }
 }
 
-#if defined(__x86_64__)
 static std::pair<ssize_t, bool> retryingReadSome(int Fd, char *Begin,
                                                  char *End) {
   auto BytesToRead = std::distance(Begin, End);
@@ -111,8 +103,6 @@ static bool readValueFromFile(const char
   return Result;
 }
 
-#endif /* CPU architecture */
-
 class ThreadExitFlusher {
   int Fd;
   XRayRecord *Start;
@@ -174,7 +164,6 @@ void __xray_InMemoryRawLog(int32_t FuncI
 
     // Get the cycle frequency from SysFS on Linux.
     long long CPUFrequency = -1;
-#if defined(__x86_64__)
     if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
                           &CPUFrequency)) {
       CPUFrequency *= 1000;
@@ -185,20 +174,6 @@ void __xray_InMemoryRawLog(int32_t FuncI
     } else {
       Report("Unable to determine CPU frequency for TSC accounting.");
     }
-#elif defined(__arm__)
-    // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
-    //   not have a constant frequency like TSC on x86(_64), it may go faster
-    //   or slower depending on CPU turbo or power saving mode. Furthermore,
-    //   to read from CP15 on ARM a kernel modification or a driver is needed.
-    //   We can not require this from users of compiler-rt.
-    // So on ARM we use clock_gettime() which gives the result in nanoseconds.
-    //   To get the measurements per second, we scale this by the number of
-    //   nanoseconds per second, pretending that the TSC frequency is 1GHz and
-    //   one TSC tick is 1 nanosecond.
-    CPUFrequency = NanosecondsPerSecond;
-#else
-  #error "Unsupported CPU Architecture"
-#endif /* CPU architecture */
 
     // Since we're here, we get to write the header. We set it up so that the
     // header will only be written once, at the start, and let the threads
@@ -226,29 +201,10 @@ void __xray_InMemoryRawLog(int32_t FuncI
   // First we get the useful data, and stuff it into the already aligned buffer
   // through a pointer offset.
   auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
+  unsigned CPU;
   R.RecordType = RecordTypes::NORMAL;
-#if defined(__x86_64__)
-  {
-    unsigned CPU;
-    R.TSC = __rdtscp(&CPU);
-    R.CPU = CPU;
-  }
-#elif defined(__arm__)
-  {
-    timespec TS;
-    int result = clock_gettime(CLOCK_REALTIME, &TS);
-    if(result != 0)
-    {
-      Report("clock_gettime() returned %d, errno=%d.", result, int(errno));
-      TS.tv_sec = 0;
-      TS.tv_nsec = 0;
-    }
-    R.TSC = TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
-    R.CPU = 0;
-  }
-#else
-  #error "Unsupported CPU Architecture"
-#endif /* CPU architecture */
+  R.TSC = __rdtscp(&CPU);
+  R.CPU = CPU;
   R.TId = TId;
   R.Type = Type;
   R.FuncId = FuncId;

Modified: compiler-rt/trunk/lib/xray/xray_interface.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_interface.cc?rev=280969&r1=280968&r2=280969&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_interface.cc (original)
+++ compiler-rt/trunk/lib/xray/xray_interface.cc Thu Sep  8 12:13:15 2016
@@ -26,15 +26,6 @@
 
 namespace __xray {
 
-#if defined(__x86_64__)
-  // FIXME: The actual length is 11 bytes. Why was length 12 passed to mprotect() ?
-  static const int16_t cSledLength = 12;
-#elif defined(__arm__)
-  static const int16_t cSledLength = 28;
-#else
-  #error "Unsupported CPU Architecture"
-#endif /* CPU architecture */
-
 // This is the function to call when we encounter the entry or exit sleds.
 std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr};
 
@@ -73,6 +64,13 @@ public:
 
 } // namespace __xray
 
+extern "C" {
+// The following functions have to be defined in assembler, on a per-platform
+// basis. See xray_trampoline_*.s files for implementations.
+extern void __xray_FunctionEntry();
+extern void __xray_FunctionExit();
+}
+
 extern std::atomic<bool> XRayInitialized;
 extern std::atomic<__xray::XRaySledMap> XRayInstrMap;
 
@@ -135,13 +133,12 @@ XRayPatchingStatus ControlPatching(bool
   if (InstrMap.Entries == 0)
     return XRayPatchingStatus::NOT_INITIALIZED;
 
-  const uint64_t PageSize = GetPageSizeCached();
-  if((PageSize == 0) || ( (PageSize & (PageSize-1)) != 0) ) {
-    Report("System page size is not a power of two: %lld", PageSize);
-    return XRayPatchingStatus::FAILED;
-  }
-
-  uint32_t FuncId = 1;
+  int32_t FuncId = 1;
+  static constexpr uint8_t CallOpCode = 0xe8;
+  static constexpr uint16_t MovR10Seq = 0xba41;
+  static constexpr uint16_t Jmp9Seq = 0x09eb;
+  static constexpr uint8_t JmpOpCode = 0xe9;
+  static constexpr uint8_t RetOpCode = 0xc3;
   uint64_t CurFun = 0;
   for (std::size_t I = 0; I < InstrMap.Entries; I++) {
     auto Sled = InstrMap.Sleds[I];
@@ -156,28 +153,112 @@ XRayPatchingStatus ControlPatching(bool
     // While we're here, we should patch the nop sled. To do that we mprotect
     // the page containing the function to be writeable.
     void *PageAlignedAddr =
-        reinterpret_cast<void *>(Sled.Address & ~(PageSize-1));
+        reinterpret_cast<void *>(Sled.Address & ~((2 << 16) - 1));
     std::size_t MProtectLen =
-        (Sled.Address + cSledLength) - reinterpret_cast<uint64_t>(PageAlignedAddr);
+        (Sled.Address + 12) - reinterpret_cast<uint64_t>(PageAlignedAddr);
     MProtectHelper Protector(PageAlignedAddr, MProtectLen);
     if (Protector.MakeWriteable() == -1) {
       printf("Failed mprotect: %d\n", errno);
       return XRayPatchingStatus::FAILED;
     }
 
-    bool Success = false;
-    switch(Sled.Kind) {
-    case XRayEntryType::ENTRY:
-      Success = patchFunctionEntry(Enable, FuncId, Sled);
-      break;
-    case XRayEntryType::EXIT:
-      Success = patchFunctionExit(Enable, FuncId, Sled);
-      break;
-    default:
-      Report("Unsupported sled kind: %d", int(Sled.Kind));
-      continue;
+    static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
+    static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
+    if (Sled.Kind == XRayEntryType::ENTRY) {
+      // FIXME: Implement this in a more extensible manner, per-platform.
+      // Here we do the dance of replacing the following sled:
+      //
+      // xray_sled_n:
+      //   jmp +9
+      //   <9 byte nop>
+      //
+      // With the following:
+      //
+      //   mov r10d, <function id>
+      //   call <relative 32bit offset to entry trampoline>
+      //
+      // We need to do this in the following order:
+      //
+      // 1. Put the function id first, 2 bytes from the start of the sled (just
+      // after the 2-byte jmp instruction).
+      // 2. Put the call opcode 6 bytes from the start of the sled.
+      // 3. Put the relative offset 7 bytes from the start of the sled.
+      // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+      // opcode and first operand.
+      //
+      // Prerequisite is to compute the relative offset to the
+      // __xray_FunctionEntry function's address.
+      int64_t TrampolineOffset =
+          reinterpret_cast<int64_t>(__xray_FunctionEntry) -
+          (static_cast<int64_t>(Sled.Address) + 11);
+      if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+        Report("XRay Entry trampoline (%p) too far from sled (%p); distance = "
+               "%ld\n",
+               __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address),
+               TrampolineOffset);
+        continue;
+      }
+      if (Enable) {
+        *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+        *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
+        *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+        std::atomic_store_explicit(
+            reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+            std::memory_order_release);
+      } else {
+        std::atomic_store_explicit(
+            reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
+            std::memory_order_release);
+        // FIXME: Write out the nops still?
+      }
+    }
+
+    if (Sled.Kind == XRayEntryType::EXIT) {
+      // FIXME: Implement this in a more extensible manner, per-platform.
+      // Here we do the dance of replacing the following sled:
+      //
+      // xray_sled_n:
+      //   ret
+      //   <10 byte nop>
+      //
+      // With the following:
+      //
+      //   mov r10d, <function id>
+      //   jmp <relative 32bit offset to exit trampoline>
+      //
+      // 1. Put the function id first, 2 bytes from the start of the sled (just
+      // after the 1-byte ret instruction).
+      // 2. Put the jmp opcode 6 bytes from the start of the sled.
+      // 3. Put the relative offset 7 bytes from the start of the sled.
+      // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+      // opcode and first operand.
+      //
+      // Prerequisite is to compute the relative offset fo the
+      // __xray_FunctionExit function's address.
+      int64_t TrampolineOffset =
+          reinterpret_cast<int64_t>(__xray_FunctionExit) -
+          (static_cast<int64_t>(Sled.Address) + 11);
+      if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+        Report("XRay Exit trampoline (%p) too far from sled (%p); distance = "
+               "%ld\n",
+               __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address),
+               TrampolineOffset);
+        continue;
+      }
+      if (Enable) {
+        *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+        *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
+        *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+        std::atomic_store_explicit(
+            reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+            std::memory_order_release);
+      } else {
+        std::atomic_store_explicit(
+            reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
+            std::memory_order_release);
+        // FIXME: Write out the nops still?
+      }
     }
-    (void)Success;
   }
   XRayPatching.store(false, std::memory_order_release);
   PatchingSuccess = true;

Modified: compiler-rt/trunk/lib/xray/xray_interface_internal.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_interface_internal.h?rev=280969&r1=280968&r2=280969&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_interface_internal.h (original)
+++ compiler-rt/trunk/lib/xray/xray_interface_internal.h Thu Sep  8 12:13:15 2016
@@ -16,30 +16,18 @@
 #define XRAY_INTERFACE_INTERNAL_H
 
 #include "xray/xray_interface.h"
-#include "sanitizer_common/sanitizer_platform.h"
 #include <cstddef>
 #include <cstdint>
 
 extern "C" {
 
 struct XRaySledEntry {
-#if SANITIZER_WORDSIZE == 64
   uint64_t Address;
   uint64_t Function;
   unsigned char Kind;
   unsigned char AlwaysInstrument;
   unsigned char Padding[14]; // Need 32 bytes
-#elif SANITIZER_WORDSIZE == 32
-  uint32_t Address;
-  uint32_t Function;
-  unsigned char Kind;
-  unsigned char AlwaysInstrument;
-  unsigned char Padding[6]; // Need 16 bytes
-#else
-	#error "Unsupported word size."
-#endif
 };
-
 }
 
 namespace __xray {
@@ -49,16 +37,6 @@ struct XRaySledMap {
   size_t Entries;
 };
 
-bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled);
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled);
-
 } // namespace __xray
 
-extern "C" {
-// The following functions have to be defined in assembler, on a per-platform
-// basis. See xray_trampoline_*.S files for implementations.
-extern void __xray_FunctionEntry();
-extern void __xray_FunctionExit();
-}
-
 #endif

Removed: compiler-rt/trunk/lib/xray/xray_trampoline_arm.S
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_trampoline_arm.S?rev=280968&view=auto
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_trampoline_arm.S (original)
+++ compiler-rt/trunk/lib/xray/xray_trampoline_arm.S (removed)
@@ -1,65 +0,0 @@
-    .syntax unified
-    .arch armv7
-    .fpu vfpv3
-    .code 32
-    .global _ZN6__xray19XRayPatchedFunctionE
-    @ Word-aligned function entry point
-    .p2align 2
-    @ Let C/C++ see the symbol
-    .global __xray_FunctionEntry
-    @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc)
-    @ Assume that "q" part of the floating-point registers is not used
-    @   for passing parameters to C/C++ functions.
-    .type __xray_FunctionEntry, %function
-    @ In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with
-    @   FuncId passed in r0 register.
-__xray_FunctionEntry:
-    PUSH {r1-r3,lr}
-    @ Save floating-point parameters of the instrumented function
-    VPUSH {d0-d7}
-    MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE
-    MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE
-    LDR r2, [r1]
-    @ Handler address is nullptr if handler is not set
-    CMP r2, #0
-    BEQ FunctionEntry_restore
-    @ Function ID is already in r0 (the first parameter).
-    @ r1=0 means that we are tracing an entry event
-    MOV r1, #0
-    @ Call the handler with 2 parameters in r0 and r1
-    BLX r2
-FunctionEntry_restore:
-    @ Restore floating-point parameters of the instrumented function
-    VPOP {d0-d7}
-    POP {r1-r3,pc}
-
-    @ Word-aligned function entry point
-    .p2align 2
-    @ Let C/C++ see the symbol
-	.global __xray_FunctionExit
-	@ Assume that d1-d7 are not used for the return value.
-    @ Assume that "q" part of the floating-point registers is not used for the
-    @   return value in C/C++.
-	.type __xray_FunctionExit, %function
-	@ In C++ it is extern "C" void __xray_FunctionExit(uint32_t FuncId) with
-    @   FuncId passed in r0 register.
-__xray_FunctionExit:
-    PUSH {r1-r3,lr}
-    @ Save the floating-point return value of the instrumented function
-    VPUSH {d0}
-    @ Load the handler address
-    MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE
-    MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE
-    LDR r2, [r1]
-    @ Handler address is nullptr if handler is not set
-    CMP r2, #0
-    BEQ FunctionExit_restore
-    @ Function ID is already in r0 (the first parameter).
-    @ 1 means that we are tracing an exit event
-    MOV r1, #1
-    @ Call the handler with 2 parameters in r0 and r1
-    BLX r2
-FunctionExit_restore:
-    @ Restore the floating-point return value of the instrumented function
-    VPOP {d0}
-    POP {r1-r3,pc}

Removed: compiler-rt/trunk/lib/xray/xray_x86_64.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_x86_64.cc?rev=280968&view=auto
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_x86_64.cc (original)
+++ compiler-rt/trunk/lib/xray/xray_x86_64.cc (removed)
@@ -1,116 +0,0 @@
-#include "xray_interface_internal.h"
-#include "sanitizer_common/sanitizer_common.h"
-#include <atomic>
-#include <cstdint>
-#include <limits>
-
-namespace __xray {
-
-static constexpr uint8_t CallOpCode = 0xe8;
-static constexpr uint16_t MovR10Seq = 0xba41;
-static constexpr uint16_t Jmp9Seq = 0x09eb;
-static constexpr uint8_t JmpOpCode = 0xe9;
-static constexpr uint8_t RetOpCode = 0xc3;
-
-static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
-static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
-
-bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled)
-{
-  // Here we do the dance of replacing the following sled:
-  //
-  // xray_sled_n:
-  //   jmp +9
-  //   <9 byte nop>
-  //
-  // With the following:
-  //
-  //   mov r10d, <function id>
-  //   call <relative 32bit offset to entry trampoline>
-  //
-  // We need to do this in the following order:
-  //
-  // 1. Put the function id first, 2 bytes from the start of the sled (just
-  // after the 2-byte jmp instruction).
-  // 2. Put the call opcode 6 bytes from the start of the sled.
-  // 3. Put the relative offset 7 bytes from the start of the sled.
-  // 4. Do an atomic write over the jmp instruction for the "mov r10d"
-  // opcode and first operand.
-  //
-  // Prerequisite is to compute the relative offset to the
-  // __xray_FunctionEntry function's address.
-  int64_t TrampolineOffset =
-      reinterpret_cast<int64_t>(__xray_FunctionEntry) -
-      (static_cast<int64_t>(Sled.Address) + 11);
-  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
-    Report("XRay Entry trampoline (%p) too far from sled (%p); distance = "
-           "%ld\n",
-           __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address),
-           TrampolineOffset);
-    return false;
-  }
-  if (Enable) {
-    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
-    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
-    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
-    std::atomic_store_explicit(
-        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
-        std::memory_order_release);
-  } else {
-    std::atomic_store_explicit(
-        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
-        std::memory_order_release);
-    // FIXME: Write out the nops still?
-  }
-  return true;
-}
-
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled)
-{
-  // Here we do the dance of replacing the following sled:
-  //
-  // xray_sled_n:
-  //   ret
-  //   <10 byte nop>
-  //
-  // With the following:
-  //
-  //   mov r10d, <function id>
-  //   jmp <relative 32bit offset to exit trampoline>
-  //
-  // 1. Put the function id first, 2 bytes from the start of the sled (just
-  // after the 1-byte ret instruction).
-  // 2. Put the jmp opcode 6 bytes from the start of the sled.
-  // 3. Put the relative offset 7 bytes from the start of the sled.
-  // 4. Do an atomic write over the jmp instruction for the "mov r10d"
-  // opcode and first operand.
-  //
-  // Prerequisite is to compute the relative offset fo the
-  // __xray_FunctionExit function's address.
-  int64_t TrampolineOffset =
-      reinterpret_cast<int64_t>(__xray_FunctionExit) -
-      (static_cast<int64_t>(Sled.Address) + 11);
-  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
-    Report("XRay Exit trampoline (%p) too far from sled (%p); distance = "
-           "%ld\n",
-           __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address),
-           TrampolineOffset);
-    return false;
-  }
-  if (Enable) {
-    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
-    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
-    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
-    std::atomic_store_explicit(
-        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
-        std::memory_order_release);
-  } else {
-    std::atomic_store_explicit(
-        reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
-        std::memory_order_release);
-    // FIXME: Write out the nops still?
-  }
-  return true;
-}
-
-} // namespace __xray




More information about the llvm-commits mailing list