[compiler-rt] [SystemZ][XRay] Reland XRay runtime support for SystemZ (PR #124611)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 27 11:08:02 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-xray

Author: Kai Nacke (redstar)

<details>
<summary>Changes</summary>

Adds the runtime support routines for XRay on SystemZ. Only function entry/exit is implemented.

The original PR 113252 was reverted due to errors caused by adding DSO support to XRay.
This PR is the original implementation with the changed function signatures. I'll add an
implementation with DSO support later.

---
Full diff: https://github.com/llvm/llvm-project/pull/124611.diff


7 Files Affected:

- (modified) compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake (+1-1) 
- (modified) compiler-rt/lib/xray/CMakeLists.txt (+9) 
- (modified) compiler-rt/lib/xray/xray_interface.cpp (+2) 
- (modified) compiler-rt/lib/xray/xray_interface_internal.h (+4) 
- (added) compiler-rt/lib/xray/xray_s390x.cpp (+104) 
- (added) compiler-rt/lib/xray/xray_trampoline_s390x.S (+176) 
- (modified) compiler-rt/lib/xray/xray_tsc.h (+22) 


``````````diff
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index ee5be276f3df7d..2683259e93e371 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -103,7 +103,7 @@ if(APPLE)
 set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64})
 else()
 set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
-               powerpc64le ${HEXAGON} ${LOONGARCH64} ${RISCV32} ${RISCV64})
+    powerpc64le ${HEXAGON} ${LOONGARCH64} ${RISCV32} ${RISCV64} ${S390X})
 endif()
 set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64} ${ARM64})
 set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index e7f01a2f4f1640..673091807e348d 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -106,6 +106,13 @@ set(riscv64_SOURCES
   xray_trampoline_riscv64.S
   )
 
+  set(s390x_SOURCES
+  xray_s390x.cpp
+  xray_trampoline_s390x.S
+  )
+# Enable vector instructions in the assembly file.
+set_source_files_properties(xray_trampoline_s390x.S PROPERTIES COMPILE_FLAGS -mvx)
+
 set(XRAY_SOURCE_ARCHS
   arm
   armhf
@@ -116,6 +123,7 @@ set(XRAY_SOURCE_ARCHS
   mips64
   mips64el
   powerpc64le
+  s390x
   x86_64
   )
 
@@ -168,6 +176,7 @@ set(XRAY_ALL_SOURCE_FILES
   ${powerpc64le_SOURCES}
   ${riscv32_SOURCES}
   ${riscv64_SOURCES}
+  ${s390x_SOURCES}
   ${XRAY_IMPL_HEADERS}
   )
 list(REMOVE_DUPLICATES XRAY_ALL_SOURCE_FILES)
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index 4ec492c266d809..3f97827874a700 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -61,6 +61,8 @@ static const int16_t cSledLength = 20;
 static const int16_t cSledLength = 68;
 #elif defined(__riscv) && (__riscv_xlen == 32)
 static const int16_t cSledLength = 52;
+#elif defined(__s390x__)
+static const int16_t cSledLength = 18;
 #else
 #error "Unsupported CPU Architecture"
 #endif /* CPU architecture */
diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h
index a8cfe0fde84dd2..5dcccfe825cf59 100644
--- a/compiler-rt/lib/xray/xray_interface_internal.h
+++ b/compiler-rt/lib/xray/xray_interface_internal.h
@@ -29,6 +29,10 @@ extern void __xray_FunctionTailExit();
 extern void __xray_ArgLoggerEntry();
 extern void __xray_CustomEvent();
 extern void __xray_TypedEvent();
+#if defined(__s390x__)
+extern void __xray_FunctionEntryVec();
+extern void __xray_FunctionExitVec();
+#endif
 }
 
 extern "C" {
diff --git a/compiler-rt/lib/xray/xray_s390x.cpp b/compiler-rt/lib/xray/xray_s390x.cpp
new file mode 100644
index 00000000000000..599485435671bf
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_s390x.cpp
@@ -0,0 +1,104 @@
+//===-- xray_s390x.cpp ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of s390x routines.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <cassert>
+#include <cstring>
+
+bool __xray::patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+                                const XRaySledEntry &Sled,
+                                const XRayTrampolines &Trampolines,
+                                bool LogArgs) XRAY_NEVER_INSTRUMENT {
+  uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address());
+  // TODO: Trampoline addresses are currently inserted at compile-time, using
+  //       __xray_FunctionEntry and __xray_FunctionExit only.
+  //       To support DSO instrumentation, trampolines have to be written during
+  //       patching (see implementation on X86_64, e.g.).
+  if (Enable) {
+    // The resulting code is:
+    //   stmg    %r2, %r15, 16(%r15)
+    //   llilf   %2, FuncID
+    //   brasl   %r14, __xray_FunctionEntry at GOT
+    // The FuncId and the stmg instruction must be written.
+
+    // Write FuncId into llilf.
+    Address[2] = FuncId;
+    // Write last part of stmg.
+    reinterpret_cast<uint16_t *>(Address)[2] = 0x24;
+    // Write first part of stmg.
+    Address[0] = 0xeb2ff010;
+  } else {
+    // j +16 instructions.
+    Address[0] = 0xa7f4000b;
+  }
+  return true;
+}
+
+bool __xray::patchFunctionExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address());
+  // TODO: Trampoline addresses are currently inserted at compile-time, using
+  //       __xray_FunctionEntry and __xray_FunctionExit only.
+  //       To support DSO instrumentation, trampolines have to be written during
+  //       patching (see implementation on X86_64, e.g.).
+  if (Enable) {
+    // The resulting code is:
+    //   stmg    %r2, %r15, 24(%r15)
+    //   llilf   %2,FuncID
+    //   j       __xray_FunctionEntry at GOT
+    // The FuncId and the stmg instruction must be written.
+
+    // Write FuncId into llilf.
+    Address[2] = FuncId;
+    // Write last part of of stmg.
+    reinterpret_cast<uint16_t *>(Address)[2] = 0x24;
+    // Write first part of stmg.
+    Address[0] = 0xeb2ff010;
+  } else {
+    // br %14 instruction.
+    reinterpret_cast<uint16_t *>(Address)[0] = 0x07fe;
+  }
+  return true;
+}
+
+bool __xray::patchFunctionTailExit(
+    const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+    const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+  return patchFunctionExit(Enable, FuncId, Sled, Trampolines);
+}
+
+bool __xray::patchCustomEvent(const bool Enable, const uint32_t FuncId,
+                              const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // TODO Implement.
+  return false;
+}
+
+bool __xray::patchTypedEvent(const bool Enable, const uint32_t FuncId,
+                             const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // TODO Implement.
+  return false;
+}
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+  // TODO this will have to be implemented in the trampoline assembly file.
+}
+
+extern "C" void __xray_FunctionTailExit() XRAY_NEVER_INSTRUMENT {
+  // For PowerPC, calls to __xray_FunctionEntry and __xray_FunctionExit
+  // are statically inserted into the sled. Tail exits are handled like normal
+  // function exits. This trampoline is therefore not implemented.
+  // This stub is placed here to avoid linking issues.
+}
diff --git a/compiler-rt/lib/xray/xray_trampoline_s390x.S b/compiler-rt/lib/xray/xray_trampoline_s390x.S
new file mode 100644
index 00000000000000..4073943641b999
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_trampoline_s390x.S
@@ -0,0 +1,176 @@
+//===-- xray_trampoline_s390x.s ---------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the s390x-specific assembler for the trampolines.
+// 2 versions of the functions are provided: one which does not store the
+// vector registers, and one which does store them. The compiler decides
+// which to call based on the availability of the vector extension.
+//
+//===----------------------------------------------------------------------===//
+
+    .text
+
+// Minimal stack frame size
+#define STACKSZ  160
+
+// Minimal stack frame size (160) plus space for 8 vector registers a 16 bytes.
+#define STACKSZ_VEC  288
+
+//===----------------------------------------------------------------------===//
+
+    .globl  __xray_FunctionEntry
+    .p2align    4
+    .type   __xray_FunctionEntry, at function
+__xray_FunctionEntry:
+    # The registers r2-15 of the instrumented function are already saved in the
+    # stack frame. On entry, r2 contains the function id, and %r14 the address
+    # of the first instruction of the instrumented function.
+    # Register r14 will be stored in the slot reserved for compiler use.
+    stg     %r14, 8(%r15)
+    std     %f0, 128(%r15)
+    std     %f2, 136(%r15)
+    std     %f4, 144(%r15)
+    std     %f6, 152(%r15)
+    aghi    %r15, -STACKSZ
+
+    lgrl    %r1, _ZN6__xray19XRayPatchedFunctionE at GOT
+    ltg     %r1, 0(%r1)
+    je      .Lrestore0
+
+    # Set r3 to XRayEntryType::ENTRY = 0.
+    # The FuncId is still stored in r2.
+    lghi    %r3, 0
+    basr    %r14, %r1
+
+.Lrestore0:
+    ld      %f6, STACKSZ+152(%r15)
+    ld      %f4, STACKSZ+144(%r15)
+    ld      %f2, STACKSZ+136(%r15)
+    ld      %f0, STACKSZ+128(%r15)
+    lmg     %r1, %r15, STACKSZ+8(%r15)
+    br      %r1
+.Lfunc_end0:
+    .size    __xray_FunctionEntry, .Lfunc_end0-__xray_FunctionEntry
+
+//===----------------------------------------------------------------------===//
+
+    .globl  __xray_FunctionEntryVec
+    .p2align    4
+    .type   __xray_FunctionEntryVec, at function
+__xray_FunctionEntryVec:
+    # The registers r2-15 of the instrumented function are already saved in the
+    # stack frame. On entry, r2 contains the function id, and %r14 the address
+    # of the first instruction of the instrumented function.
+    # Register r14 will be stored in the slot reserved for compiler use.
+    stg     %r14, 8(%r15)
+    std     %f0, 128(%r15)
+    std     %f2, 136(%r15)
+    std     %f4, 144(%r15)
+    std     %f6, 152(%r15)
+    aghi    %r15, -STACKSZ_VEC
+    vstm    %v24, %v31, 160(%r15)
+
+    lgrl    %r1, _ZN6__xray19XRayPatchedFunctionE at GOT
+    ltg     %r1, 0(%r1)
+    je      .Lrestore1
+
+    # Set r3 to XRayEntryType::ENTRY = 0.
+    # The FuncId is still stored in r2.
+    lghi    %r3, 0
+    basr    %r14, %r1
+
+.Lrestore1:
+    vlm     %v24, %v31, 160(%r15)
+    ld      %f6, STACKSZ_VEC+152(%r15)
+    ld      %f4, STACKSZ_VEC+144(%r15)
+    ld      %f2, STACKSZ_VEC+136(%r15)
+    ld      %f0, STACKSZ_VEC+128(%r15)
+    lmg     %r1, %r15, STACKSZ_VEC+8(%r15)
+    br      %r1
+.Lfunc_end1:
+    .size    __xray_FunctionEntryVec, .Lfunc_end1-__xray_FunctionEntryVec
+
+//===----------------------------------------------------------------------===//
+
+    .globl  __xray_FunctionExit
+    .p2align    4
+    .type   __xray_FunctionExit, at function
+__xray_FunctionExit:
+    # The registers r2-15 of the instrumented function are already saved in the
+    # stack frame. On entry, the register r2 contains the function id.
+    # At the end, the function jumps to the address saved in the slot for r14,
+    # which contains the return address into the caller of the instrumented
+    # function.
+    std     %f0, 128(%r15)
+    std     %f2, 136(%r15)
+    std     %f4, 144(%r15)
+    std     %f6, 152(%r15)
+    aghi    %r15, -STACKSZ
+
+    lgrl    %r1, _ZN6__xray19XRayPatchedFunctionE at GOT
+    ltg     %r1, 0(%r1)
+    je      .Lrestore2
+
+    # Set r3 to XRayEntryType::EXIT = 1.
+    # The FuncId is still stored in r2.
+    lghi    %r3, 1
+    basr    %r14, %r1
+
+.Lrestore2:
+    ld      %f6, STACKSZ+152(%r15)
+    ld      %f4, STACKSZ+144(%r15)
+    ld      %f2, STACKSZ+136(%r15)
+    ld      %f0, STACKSZ+128(%r15)
+    lmg     %r2, %r15, STACKSZ+16(%r15)
+    br      %r14
+.Lfunc_end2:
+    .size    __xray_FunctionExit, .Lfunc_end2-__xray_FunctionExit
+
+//===----------------------------------------------------------------------===//
+
+    .globl  __xray_FunctionExitVec
+    .p2align    4
+    .type   __xray_FunctionExitVec, at function
+__xray_FunctionExitVec:
+    # The registers r2-15 of the instrumented function are already saved in the
+    # stack frame. On entry, the register r2 contains the function id.
+    # At the end, the function jumps to the address saved in the slot for r14,
+    # which contains the return address into the caller of the instrumented
+    # function.
+    std     %f0, 128(%r15)
+    std     %f2, 136(%r15)
+    std     %f4, 144(%r15)
+    std     %f6, 152(%r15)
+    aghi    %r15, -STACKSZ_VEC
+    vstm    %v24, %v31, 160(%r15)
+
+    lgrl    %r1, _ZN6__xray19XRayPatchedFunctionE at GOT
+    ltg     %r1, 0(%r1)
+    je      .Lrestore3
+
+    # Set r3 to XRayEntryType::EXIT = 1.
+    # The FuncId is still stored in r2.
+    lghi    %r3, 1
+    basr    %r14, %r1
+
+.Lrestore3:
+    vlm     %v24, %v31, 160(%r15)
+    ld      %f6, STACKSZ_VEC+152(%r15)
+    ld      %f4, STACKSZ_VEC+144(%r15)
+    ld      %f2, STACKSZ_VEC+136(%r15)
+    ld      %f0, STACKSZ_VEC+128(%r15)
+    lmg     %r2, %r15, STACKSZ_VEC+16(%r15)
+    br      %r14
+.Lfunc_end3:
+    .size    __xray_FunctionExit, .Lfunc_end3-__xray_FunctionExit
+
+//===----------------------------------------------------------------------===//
+
+    .section    ".note.GNU-stack","", at progbits
diff --git a/compiler-rt/lib/xray/xray_tsc.h b/compiler-rt/lib/xray/xray_tsc.h
index b62a686d6ce0f2..17e06c7035d85c 100644
--- a/compiler-rt/lib/xray/xray_tsc.h
+++ b/compiler-rt/lib/xray/xray_tsc.h
@@ -83,6 +83,28 @@ inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
 
 } // namespace __xray
 
+#elif defined(__s390x__)
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "xray_defs.h"
+#include <cerrno>
+#include <cstdint>
+#include <time.h>
+
+namespace __xray {
+
+inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+  return __builtin_readcyclecounter();
+}
+
+inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+  return NanosecondsPerSecond;
+}
+
+} // namespace __xray
+
 #else
 #error Target architecture is not supported.
 #endif // CPU architecture

``````````

</details>


https://github.com/llvm/llvm-project/pull/124611


More information about the llvm-commits mailing list