[compiler-rt] [SystemZ][XRay] XRay runtime support for SystemZ (PR #113252)
Kai Nacke via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 24 06:00:52 PDT 2024
https://github.com/redstar updated https://github.com/llvm/llvm-project/pull/113252
>From 2ce5bddcc1a1050f86927efd825b59615fb930c1 Mon Sep 17 00:00:00 2001
From: Kai Nacke <kai.peter.nacke at ibm.com>
Date: Mon, 21 Oct 2024 22:24:46 -0400
Subject: [PATCH 1/3] [SystemZ][XRay] XRay runtime support for SystemZ
Adds the runtime support routines for XRay on SystemZ.
Only function entry/exit is implemented.
---
.../cmake/Modules/AllSupportedArchDefs.cmake | 2 +-
compiler-rt/lib/xray/CMakeLists.txt | 7 ++
compiler-rt/lib/xray/xray_interface.cpp | 2 +
compiler-rt/lib/xray/xray_s390x.cpp | 90 ++++++++++++++
compiler-rt/lib/xray/xray_trampoline_s390x.S | 111 ++++++++++++++++++
compiler-rt/lib/xray/xray_tsc.h | 22 ++++
6 files changed, 233 insertions(+), 1 deletion(-)
create mode 100644 compiler-rt/lib/xray/xray_s390x.cpp
create mode 100644 compiler-rt/lib/xray/xray_trampoline_s390x.S
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index 809e9277156912..bd51db766faab9 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -102,7 +102,7 @@ if(APPLE)
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64})
else()
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
- powerpc64le ${HEXAGON} ${LOONGARCH64})
+ powerpc64le ${HEXAGON} ${LOONGARCH64} ${S390X})
endif()
set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index cf7b5062aae32d..8e1ef2db2873a8 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -83,6 +83,11 @@ set(hexagon_SOURCES
xray_trampoline_hexagon.S
)
+set(s390x_SOURCES
+ xray_s390x.cpp
+ xray_trampoline_s390x.S
+ )
+
set(XRAY_SOURCE_ARCHS
arm
armhf
@@ -93,6 +98,7 @@ set(XRAY_SOURCE_ARCHS
mips64
mips64el
powerpc64le
+ s390x
x86_64
)
@@ -141,6 +147,7 @@ set(XRAY_ALL_SOURCE_FILES
${mips64_SOURCES}
${mips64el_SOURCES}
${powerpc64le_SOURCES}
+ ${s390x_SOURCES}
${XRAY_IMPL_HEADERS}
)
list(REMOVE_DUPLICATES XRAY_ALL_SOURCE_FILES)
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index 5839043fcb93a8..fee7dccf695ca0 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -56,6 +56,8 @@ static const int16_t cSledLength = 64;
static const int16_t cSledLength = 8;
#elif defined(__hexagon__)
static const int16_t cSledLength = 20;
+#elif defined(__s390x__)
+static const int16_t cSledLength = 18;
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */
diff --git a/compiler-rt/lib/xray/xray_s390x.cpp b/compiler-rt/lib/xray/xray_s390x.cpp
new file mode 100644
index 00000000000000..135994e54a2d7a
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_s390x.cpp
@@ -0,0 +1,90 @@
+//===-- xray_s390x.cpp ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of s390x routines.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <cassert>
+#include <cstring>
+
+namespace __xray {
+
+bool patchFunctionEntry(const bool Enable, uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ const uint64_t Address = Sled.address();
+ if (Enable) {
+ // The resulting code is:
+ // stmg %r2, %r15, 16(%r15)
+ // llilf %2, FuncID
+ // brasl %r14, __xray_FunctionEntry at GOT
+ // The FuncId and the stmg instruction must be written.
+
+ // Write FuncId into llilf.
+ reinterpret_cast<uint32_t *>(Address)[2] = FuncId;
+ // Write last part of stmg.
+ reinterpret_cast<uint16_t *>(Address)[2] = 0x24;
+ // Write first part of stmg.
+ reinterpret_cast<uint32_t *>(Address)[0] = 0xeb2ff010;
+ } else {
+ // j +16 instructions.
+ *reinterpret_cast<uint32_t *>(Address) = 0xa7f4000b;
+ }
+ return true;
+}
+
+bool patchFunctionExit(const bool Enable, uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ const uint64_t Address = Sled.address();
+ if (Enable) {
+ // The resulting code is:
+ // stmg %r2, %r15, 24(%r15)
+ // llilf %2,FuncID
+ // j __xray_FunctionEntry at GOT
+ // The FuncId and the stmg instruction must be written.
+
+ // Write FuncId into llilf.
+ reinterpret_cast<uint32_t *>(Address)[2] = FuncId;
+ // Write last part of of stmg.
+ reinterpret_cast<uint16_t *>(Address)[2] = 0x24;
+ // Write first part of stmg.
+ reinterpret_cast<uint32_t *>(Address)[0] = 0xeb2ff010;
+ } else {
+ // br %14 instruction.
+ *reinterpret_cast<uint16_t *>(Address) = 0x07fe;
+ }
+ return true;
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchFunctionExit(Enable, FuncId, Sled);
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement.
+ return false;
+}
+
+bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement.
+ return false;
+}
+
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+ // FIXME: this will have to be implemented in the trampoline assembly file.
+}
diff --git a/compiler-rt/lib/xray/xray_trampoline_s390x.S b/compiler-rt/lib/xray/xray_trampoline_s390x.S
new file mode 100644
index 00000000000000..49a290ceb11f0a
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_trampoline_s390x.S
@@ -0,0 +1,111 @@
+//===-- xray_trampoline_s390x.s ---------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the s390x-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+ .text
+
+#if __VX__
+// Minimal stack frame size (160) plus space for 8 vector registers a 16 bytes.
+#define STACKSZ 288
+#else
+// Minimal stack frame size
+#define STACKSZ 160
+#endif
+
+//===----------------------------------------------------------------------===//
+
+ .globl __xray_FunctionEntry
+ .p2align 4
+ .type __xray_FunctionEntry, at function
+__xray_FunctionEntry:
+ # The registers r2-15 of the instrumented function are already saved in the
+ # stack frame. On entry, r2 contains the function id, and %r14 the address
+ # of the first instruction of the instrumented function.
+ # Register r14 will be stored in the slot reserved for compiler use.
+ stg %r14, 8(%r15)
+ std %f0, 128(%r15)
+ std %f2, 136(%r15)
+ std %f4, 144(%r15)
+ std %f6, 152(%r15)
+ aghi %r15, -STACKSZ
+#if __VX__
+ vstm %v24, %v31, 160(%r15)
+#endif
+
+ lgrl %r1, _ZN6__xray19XRayPatchedFunctionE at GOT
+ ltg %r1, 0(%r1)
+ je .Lrestore1
+
+ # Set r3 to XRayEntryType::ENTRY = 0.
+ # The FuncId is still stored in r2.
+ lghi %r3, 0
+ basr %r14, %r1
+
+.Lrestore1:
+#if __VX__
+ vlm %v24, %v31, 160(%r15)
+#endif
+ ld %f6, STACKSZ+152(%r15)
+ ld %f4, STACKSZ+144(%r15)
+ ld %f2, STACKSZ+136(%r15)
+ ld %f0, STACKSZ+128(%r15)
+ lmg %r1, %r15, STACKSZ+8(%r15)
+ br %r1
+.Lfunc_end0:
+ .size __xray_FunctionEntry, .Lfunc_end0-__xray_FunctionEntry
+
+//===----------------------------------------------------------------------===//
+
+ .globl __xray_FunctionExit
+ .p2align 4
+ .type __xray_FunctionExit, at function
+__xray_FunctionExit:
+ # The registers r2-15 of the instrumented function are already saved in the
+ # stack frame. On entry, the register r2 contains the function id.
+ # At the end, the function jumps to the address saved in the slot for r14,
+ # which contains the return address into the caller of the instrumented
+ # function.
+ std %f0, 128(%r15)
+ std %f2, 136(%r15)
+ std %f4, 144(%r15)
+ std %f6, 152(%r15)
+ aghi %r15, -STACKSZ
+#if __VX__
+ vstm %v24, %v31, 160(%r15)
+#endif
+
+ lgrl %r1, _ZN6__xray19XRayPatchedFunctionE at GOT
+ ltg %r1, 0(%r1)
+ je .Lrestore2
+
+ # Set r3 to XRayEntryType::EXIT = 1.
+ # The FuncId is still stored in r2.
+ lghi %r3, 1
+ basr %r14, %r1
+
+.Lrestore2:
+#if __VX__
+ vlm %v24, %v31, 160(%r15)
+#endif
+ ld %f6, STACKSZ+152(%r15)
+ ld %f4, STACKSZ+144(%r15)
+ ld %f2, STACKSZ+136(%r15)
+ ld %f0, STACKSZ+128(%r15)
+ lmg %r2, %r15, STACKSZ+16(%r15)
+ br %r14
+.Lfunc_end1:
+ .size __xray_FunctionExit, .Lfunc_end1-__xray_FunctionExit
+
+//===----------------------------------------------------------------------===//
+
+ .section ".note.GNU-stack","", at progbits
diff --git a/compiler-rt/lib/xray/xray_tsc.h b/compiler-rt/lib/xray/xray_tsc.h
index e1cafe1bf11d2d..5b2ba49b85f5d6 100644
--- a/compiler-rt/lib/xray/xray_tsc.h
+++ b/compiler-rt/lib/xray/xray_tsc.h
@@ -83,6 +83,28 @@ inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
} // namespace __xray
+#elif defined(__s390x__)
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "xray_defs.h"
+#include <cerrno>
+#include <cstdint>
+#include <time.h>
+
+namespace __xray {
+
+inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ return __builtin_readcyclecounter();
+}
+
+inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ return NanosecondsPerSecond;
+}
+
+} // namespace __xray
+
#else
#error Target architecture is not supported.
#endif // CPU architecture
>From a6accc5dc26b76efec59d437c8b1fb2386488dc4 Mon Sep 17 00:00:00 2001
From: Kai Nacke <kai.peter.nacke at ibm.com>
Date: Tue, 22 Oct 2024 13:19:16 -0400
Subject: [PATCH 2/3] Changes based on reviewer comments
---
compiler-rt/lib/xray/xray_s390x.cpp | 34 ++++++++++++++---------------
1 file changed, 16 insertions(+), 18 deletions(-)
diff --git a/compiler-rt/lib/xray/xray_s390x.cpp b/compiler-rt/lib/xray/xray_s390x.cpp
index 135994e54a2d7a..ded7aa1f23d817 100644
--- a/compiler-rt/lib/xray/xray_s390x.cpp
+++ b/compiler-rt/lib/xray/xray_s390x.cpp
@@ -17,11 +17,9 @@
#include <cassert>
#include <cstring>
-namespace __xray {
-
-bool patchFunctionEntry(const bool Enable, uint32_t FuncId,
- const XRaySledEntry &Sled,
- void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+bool __xray::patchFunctionEntry(const bool Enable, uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
const uint64_t Address = Sled.address();
if (Enable) {
// The resulting code is:
@@ -43,8 +41,9 @@ bool patchFunctionEntry(const bool Enable, uint32_t FuncId,
return true;
}
-bool patchFunctionExit(const bool Enable, uint32_t FuncId,
- const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+bool __xray::patchFunctionExit(const bool Enable, uint32_t FuncId,
+ const XRaySledEntry &Sled)
+ XRAY_NEVER_INSTRUMENT {
const uint64_t Address = Sled.address();
if (Enable) {
// The resulting code is:
@@ -66,25 +65,24 @@ bool patchFunctionExit(const bool Enable, uint32_t FuncId,
return true;
}
-bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
- const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+bool __xray::patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled)
+ XRAY_NEVER_INSTRUMENT {
return patchFunctionExit(Enable, FuncId, Sled);
}
-bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
- const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
- // FIXME: Implement.
+bool __xray::patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // TODO Implement.
return false;
}
-bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
- const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
- // FIXME: Implement.
+bool __xray::patchTypedEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // TODO Implement.
return false;
}
-} // namespace __xray
-
extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
- // FIXME: this will have to be implemented in the trampoline assembly file.
+ // TODO this will have to be implemented in the trampoline assembly file.
}
>From 6d216738262e5a243d26b6398935cbfa0e411eb7 Mon Sep 17 00:00:00 2001
From: Kai Nacke <kai.peter.nacke at ibm.com>
Date: Thu, 24 Oct 2024 09:00:34 -0400
Subject: [PATCH 3/3] Address reviewer comments
- Simplify patching of sled
- Introduce vector version of the trampoline
---
compiler-rt/lib/xray/CMakeLists.txt | 2 +
.../lib/xray/xray_interface_internal.h | 4 +
compiler-rt/lib/xray/xray_s390x.cpp | 16 +--
compiler-rt/lib/xray/xray_trampoline_s390x.S | 107 ++++++++++++++----
4 files changed, 100 insertions(+), 29 deletions(-)
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index 8e1ef2db2873a8..dc2bb67e09781f 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -87,6 +87,8 @@ set(s390x_SOURCES
xray_s390x.cpp
xray_trampoline_s390x.S
)
+# Enable vector instructions in the assembly file.
+set_source_files_properties(xray_trampoline_s390x.S PROPERTIES COMPILE_FLAGS -mvx)
set(XRAY_SOURCE_ARCHS
arm
diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h
index 80c07c167f6461..ac61bc696b5222 100644
--- a/compiler-rt/lib/xray/xray_interface_internal.h
+++ b/compiler-rt/lib/xray/xray_interface_internal.h
@@ -97,6 +97,10 @@ extern void __xray_FunctionTailExit();
extern void __xray_ArgLoggerEntry();
extern void __xray_CustomEvent();
extern void __xray_TypedEvent();
+#if defined(__s390x__)
+extern void __xray_FunctionEntryVec();
+extern void __xray_FunctionExitVec();
+#endif
}
#endif
diff --git a/compiler-rt/lib/xray/xray_s390x.cpp b/compiler-rt/lib/xray/xray_s390x.cpp
index ded7aa1f23d817..86ef3573ce5092 100644
--- a/compiler-rt/lib/xray/xray_s390x.cpp
+++ b/compiler-rt/lib/xray/xray_s390x.cpp
@@ -20,7 +20,7 @@
bool __xray::patchFunctionEntry(const bool Enable, uint32_t FuncId,
const XRaySledEntry &Sled,
void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
- const uint64_t Address = Sled.address();
+ uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address());
if (Enable) {
// The resulting code is:
// stmg %r2, %r15, 16(%r15)
@@ -29,14 +29,14 @@ bool __xray::patchFunctionEntry(const bool Enable, uint32_t FuncId,
// The FuncId and the stmg instruction must be written.
// Write FuncId into llilf.
- reinterpret_cast<uint32_t *>(Address)[2] = FuncId;
+ Address[2] = FuncId;
// Write last part of stmg.
reinterpret_cast<uint16_t *>(Address)[2] = 0x24;
// Write first part of stmg.
- reinterpret_cast<uint32_t *>(Address)[0] = 0xeb2ff010;
+ Address[0] = 0xeb2ff010;
} else {
// j +16 instructions.
- *reinterpret_cast<uint32_t *>(Address) = 0xa7f4000b;
+ Address[0] = 0xa7f4000b;
}
return true;
}
@@ -44,7 +44,7 @@ bool __xray::patchFunctionEntry(const bool Enable, uint32_t FuncId,
bool __xray::patchFunctionExit(const bool Enable, uint32_t FuncId,
const XRaySledEntry &Sled)
XRAY_NEVER_INSTRUMENT {
- const uint64_t Address = Sled.address();
+ uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address());
if (Enable) {
// The resulting code is:
// stmg %r2, %r15, 24(%r15)
@@ -53,14 +53,14 @@ bool __xray::patchFunctionExit(const bool Enable, uint32_t FuncId,
// The FuncId and the stmg instruction must be written.
// Write FuncId into llilf.
- reinterpret_cast<uint32_t *>(Address)[2] = FuncId;
+ Address[2] = FuncId;
// Write last part of of stmg.
reinterpret_cast<uint16_t *>(Address)[2] = 0x24;
// Write first part of stmg.
- reinterpret_cast<uint32_t *>(Address)[0] = 0xeb2ff010;
+ Address[0] = 0xeb2ff010;
} else {
// br %14 instruction.
- *reinterpret_cast<uint16_t *>(Address) = 0x07fe;
+ reinterpret_cast<uint16_t *>(Address)[0] = 0x07fe;
}
return true;
}
diff --git a/compiler-rt/lib/xray/xray_trampoline_s390x.S b/compiler-rt/lib/xray/xray_trampoline_s390x.S
index 49a290ceb11f0a..4073943641b999 100644
--- a/compiler-rt/lib/xray/xray_trampoline_s390x.S
+++ b/compiler-rt/lib/xray/xray_trampoline_s390x.S
@@ -9,18 +9,19 @@
// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// This implements the s390x-specific assembler for the trampolines.
+// 2 versions of the functions are provided: one which does not store the
+// vector registers, and one which does store them. The compiler decides
+// which to call based on the availability of the vector extension.
//
//===----------------------------------------------------------------------===//
.text
-#if __VX__
-// Minimal stack frame size (160) plus space for 8 vector registers a 16 bytes.
-#define STACKSZ 288
-#else
// Minimal stack frame size
#define STACKSZ 160
-#endif
+
+// Minimal stack frame size (160) plus space for 8 vector registers a 16 bytes.
+#define STACKSZ_VEC 288
//===----------------------------------------------------------------------===//
@@ -38,23 +39,17 @@ __xray_FunctionEntry:
std %f4, 144(%r15)
std %f6, 152(%r15)
aghi %r15, -STACKSZ
-#if __VX__
- vstm %v24, %v31, 160(%r15)
-#endif
lgrl %r1, _ZN6__xray19XRayPatchedFunctionE at GOT
ltg %r1, 0(%r1)
- je .Lrestore1
+ je .Lrestore0
# Set r3 to XRayEntryType::ENTRY = 0.
# The FuncId is still stored in r2.
lghi %r3, 0
basr %r14, %r1
-.Lrestore1:
-#if __VX__
- vlm %v24, %v31, 160(%r15)
-#endif
+.Lrestore0:
ld %f6, STACKSZ+152(%r15)
ld %f4, STACKSZ+144(%r15)
ld %f2, STACKSZ+136(%r15)
@@ -64,6 +59,44 @@ __xray_FunctionEntry:
.Lfunc_end0:
.size __xray_FunctionEntry, .Lfunc_end0-__xray_FunctionEntry
+//===----------------------------------------------------------------------===//
+
+ .globl __xray_FunctionEntryVec
+ .p2align 4
+ .type __xray_FunctionEntryVec, at function
+__xray_FunctionEntryVec:
+ # The registers r2-15 of the instrumented function are already saved in the
+ # stack frame. On entry, r2 contains the function id, and %r14 the address
+ # of the first instruction of the instrumented function.
+ # Register r14 will be stored in the slot reserved for compiler use.
+ stg %r14, 8(%r15)
+ std %f0, 128(%r15)
+ std %f2, 136(%r15)
+ std %f4, 144(%r15)
+ std %f6, 152(%r15)
+ aghi %r15, -STACKSZ_VEC
+ vstm %v24, %v31, 160(%r15)
+
+ lgrl %r1, _ZN6__xray19XRayPatchedFunctionE at GOT
+ ltg %r1, 0(%r1)
+ je .Lrestore1
+
+ # Set r3 to XRayEntryType::ENTRY = 0.
+ # The FuncId is still stored in r2.
+ lghi %r3, 0
+ basr %r14, %r1
+
+.Lrestore1:
+ vlm %v24, %v31, 160(%r15)
+ ld %f6, STACKSZ_VEC+152(%r15)
+ ld %f4, STACKSZ_VEC+144(%r15)
+ ld %f2, STACKSZ_VEC+136(%r15)
+ ld %f0, STACKSZ_VEC+128(%r15)
+ lmg %r1, %r15, STACKSZ_VEC+8(%r15)
+ br %r1
+.Lfunc_end1:
+ .size __xray_FunctionEntryVec, .Lfunc_end1-__xray_FunctionEntryVec
+
//===----------------------------------------------------------------------===//
.globl __xray_FunctionExit
@@ -80,9 +113,6 @@ __xray_FunctionExit:
std %f4, 144(%r15)
std %f6, 152(%r15)
aghi %r15, -STACKSZ
-#if __VX__
- vstm %v24, %v31, 160(%r15)
-#endif
lgrl %r1, _ZN6__xray19XRayPatchedFunctionE at GOT
ltg %r1, 0(%r1)
@@ -94,17 +124,52 @@ __xray_FunctionExit:
basr %r14, %r1
.Lrestore2:
-#if __VX__
- vlm %v24, %v31, 160(%r15)
-#endif
ld %f6, STACKSZ+152(%r15)
ld %f4, STACKSZ+144(%r15)
ld %f2, STACKSZ+136(%r15)
ld %f0, STACKSZ+128(%r15)
lmg %r2, %r15, STACKSZ+16(%r15)
br %r14
-.Lfunc_end1:
- .size __xray_FunctionExit, .Lfunc_end1-__xray_FunctionExit
+.Lfunc_end2:
+ .size __xray_FunctionExit, .Lfunc_end2-__xray_FunctionExit
+
+//===----------------------------------------------------------------------===//
+
+ .globl __xray_FunctionExitVec
+ .p2align 4
+ .type __xray_FunctionExitVec, at function
+__xray_FunctionExitVec:
+ # The registers r2-15 of the instrumented function are already saved in the
+ # stack frame. On entry, the register r2 contains the function id.
+ # At the end, the function jumps to the address saved in the slot for r14,
+ # which contains the return address into the caller of the instrumented
+ # function.
+ std %f0, 128(%r15)
+ std %f2, 136(%r15)
+ std %f4, 144(%r15)
+ std %f6, 152(%r15)
+ aghi %r15, -STACKSZ_VEC
+ vstm %v24, %v31, 160(%r15)
+
+ lgrl %r1, _ZN6__xray19XRayPatchedFunctionE at GOT
+ ltg %r1, 0(%r1)
+ je .Lrestore3
+
+ # Set r3 to XRayEntryType::EXIT = 1.
+ # The FuncId is still stored in r2.
+ lghi %r3, 1
+ basr %r14, %r1
+
+.Lrestore3:
+ vlm %v24, %v31, 160(%r15)
+ ld %f6, STACKSZ_VEC+152(%r15)
+ ld %f4, STACKSZ_VEC+144(%r15)
+ ld %f2, STACKSZ_VEC+136(%r15)
+ ld %f0, STACKSZ_VEC+128(%r15)
+ lmg %r2, %r15, STACKSZ_VEC+16(%r15)
+ br %r14
+.Lfunc_end3:
+ .size __xray_FunctionExit, .Lfunc_end3-__xray_FunctionExit
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list