[compiler-rt] ea76b2d - [XRay][RISCV] RISCV support for XRay (#117368)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 10 17:57:08 PST 2024
Author: Min-Yih Hsu
Date: 2024-12-10T17:57:04-08:00
New Revision: ea76b2d8d83d6885bf5707832cbc4b7655e21b08
URL: https://github.com/llvm/llvm-project/commit/ea76b2d8d83d6885bf5707832cbc4b7655e21b08
DIFF: https://github.com/llvm/llvm-project/commit/ea76b2d8d83d6885bf5707832cbc4b7655e21b08.diff
LOG: [XRay][RISCV] RISCV support for XRay (#117368)
Add RISC-V support for XRay. The RV64 implementation has been tested in
both QEMU and in our hardware environment.
Currently this requires D and C extensions, but since both RV64GC and
RVA22/RVA23 are becoming mainstream, I don't think this requirement will
be a big problem.
Based on the previous work by @a-poduval :
https://reviews.llvm.org/D117929
---------
Co-authored-by: Ashwin Poduval <ashwin.poduval at gmail.com>
Added:
compiler-rt/lib/xray/xray_riscv.cpp
compiler-rt/lib/xray/xray_trampoline_riscv32.S
compiler-rt/lib/xray/xray_trampoline_riscv64.S
compiler-rt/lib/xray/xray_trampoline_riscv_common.S
llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll
Modified:
clang/lib/Driver/XRayArgs.cpp
compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
compiler-rt/lib/xray/CMakeLists.txt
compiler-rt/lib/xray/xray_interface.cpp
compiler-rt/lib/xray/xray_tsc.h
llvm/lib/CodeGen/XRayInstrumentation.cpp
llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/lib/Target/RISCV/RISCVSubtarget.h
llvm/lib/XRay/InstrumentationMap.cpp
Removed:
################################################################################
diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp
index de5c38ebc3abbd..f8c213334a2b40 100644
--- a/clang/lib/Driver/XRayArgs.cpp
+++ b/clang/lib/Driver/XRayArgs.cpp
@@ -51,6 +51,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::systemz:
+ case llvm::Triple::riscv32:
+ case llvm::Triple::riscv64:
break;
default:
D.Diag(diag::err_drv_unsupported_opt_for_target)
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index b29ae179c2b4f4..5a1e8db61023b0 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -102,7 +102,7 @@ if(APPLE)
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64})
else()
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
- powerpc64le ${HEXAGON} ${LOONGARCH64})
+ powerpc64le ${HEXAGON} ${LOONGARCH64} ${RISCV32} ${RISCV64})
endif()
set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64} ${ARM64})
set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index 7e3f1a0aa616e5..e7f01a2f4f1640 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -96,6 +96,16 @@ set(hexagon_SOURCES
xray_trampoline_hexagon.S
)
+set(riscv32_SOURCES
+ xray_riscv.cpp
+ xray_trampoline_riscv32.S
+ )
+
+set(riscv64_SOURCES
+ xray_riscv.cpp
+ xray_trampoline_riscv64.S
+ )
+
set(XRAY_SOURCE_ARCHS
arm
armhf
@@ -156,6 +166,8 @@ set(XRAY_ALL_SOURCE_FILES
${mips64_SOURCES}
${mips64el_SOURCES}
${powerpc64le_SOURCES}
+ ${riscv32_SOURCES}
+ ${riscv64_SOURCES}
${XRAY_IMPL_HEADERS}
)
list(REMOVE_DUPLICATES XRAY_ALL_SOURCE_FILES)
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index b6f0e6762f1681..4ec492c266d809 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -57,6 +57,10 @@ static const int16_t cSledLength = 64;
static const int16_t cSledLength = 8;
#elif defined(__hexagon__)
static const int16_t cSledLength = 20;
+#elif defined(__riscv) && (__riscv_xlen == 64)
+static const int16_t cSledLength = 68;
+#elif defined(__riscv) && (__riscv_xlen == 32)
+static const int16_t cSledLength = 52;
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */
diff --git a/compiler-rt/lib/xray/xray_riscv.cpp b/compiler-rt/lib/xray/xray_riscv.cpp
new file mode 100644
index 00000000000000..e3a7cdb18b6407
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_riscv.cpp
@@ -0,0 +1,266 @@
+//===-- xray_riscv.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of RISC-V specific routines (32- and 64-bit).
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+
+namespace __xray {
+
+// The machine codes for some instructions used in runtime patching.
+enum PatchOpcodes : uint32_t {
+ PO_ADDI = 0x00000013, // addi rd, rs1, imm
+ PO_ADD = 0x00000033, // add rd, rs1, rs2
+ PO_SW = 0x00002023, // sw rs2, imm(rs1)
+ PO_SD = 0x00003023, // sd rs2, imm(rs1)
+ PO_LUI = 0x00000037, // lui rd, imm
+ PO_OR = 0x00006033, // or rd, rs1, rs2
+ PO_SLLI = 0x00001013, // slli rd, rs1, shamt
+ PO_JALR = 0x00000067, // jalr rd, rs1
+ PO_LW = 0x00002003, // lw rd, imm(rs1)
+ PO_LD = 0x00003003, // ld rd, imm(rs1)
+ PO_J = 0x0000006f, // jal imm
+ PO_NOP = PO_ADDI, // addi x0, x0, 0
+};
+
+enum RegNum : uint32_t {
+ RN_X0 = 0,
+ RN_RA = 1,
+ RN_SP = 2,
+ RN_T1 = 6,
+ RN_A0 = 10,
+};
+
+static inline uint32_t encodeRTypeInstruction(uint32_t Opcode, uint32_t Rs1,
+ uint32_t Rs2, uint32_t Rd) {
+ return Rs2 << 20 | Rs1 << 15 | Rd << 7 | Opcode;
+}
+
+static inline uint32_t encodeITypeInstruction(uint32_t Opcode, uint32_t Rs1,
+ uint32_t Rd, uint32_t Imm) {
+ return Imm << 20 | Rs1 << 15 | Rd << 7 | Opcode;
+}
+
+static inline uint32_t encodeSTypeInstruction(uint32_t Opcode, uint32_t Rs1,
+ uint32_t Rs2, uint32_t Imm) {
+ uint32_t ImmMSB = (Imm & 0xfe0) << 20;
+ uint32_t ImmLSB = (Imm & 0x01f) << 7;
+ return ImmMSB | Rs2 << 20 | Rs1 << 15 | ImmLSB | Opcode;
+}
+
+static inline uint32_t encodeUTypeInstruction(uint32_t Opcode, uint32_t Rd,
+ uint32_t Imm) {
+ return Imm << 12 | Rd << 7 | Opcode;
+}
+
+static inline uint32_t encodeJTypeInstruction(uint32_t Opcode, uint32_t Rd,
+ uint32_t Imm) {
+ uint32_t ImmMSB = (Imm & 0x100000) << 11;
+ uint32_t ImmLSB = (Imm & 0x7fe) << 20;
+ uint32_t Imm11 = (Imm & 0x800) << 9;
+ uint32_t Imm1912 = (Imm & 0xff000);
+ return ImmMSB | ImmLSB | Imm11 | Imm1912 | Rd << 7 | Opcode;
+}
+
+static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
+static uint32_t lo12(uint32_t val) { return val & 0xfff; }
+
+static inline bool patchSled(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+ // When |Enable| == true,
+ // We replace the following compile-time stub (sled):
+ //
+ // xray_sled_n:
+ // J .tmpN
+ // 21 or 33 C.NOPs (42 or 66 bytes)
+ // .tmpN
+ //
+ // With one of the following runtime patches:
+ //
+ // xray_sled_n (32-bit):
+ // addi sp, sp, -16 ;create stack frame
+ // sw ra, 12(sp) ;save return address
+ // sw a0, 8(sp) ;save register a0
+ // lui ra, %hi(__xray_FunctionEntry/Exit)
+ // addi ra, ra, %lo(__xray_FunctionEntry/Exit)
+ // lui a0, %hi(function_id)
+ // addi a0, a0, %lo(function_id) ;pass function id
+ // jalr ra ;call Tracing hook
+ // lw a0, 8(sp) ;restore register a0
+ // lw ra, 12(sp) ;restore return address
+ // addi sp, sp, 16 ;delete stack frame
+ //
+ // xray_sled_n (64-bit):
+ // addi sp, sp, -32 ;create stack frame
+ // sd ra, 24(sp) ;save return address
+ // sd a0, 16(sp) ;save register a0
+ // sd t1, 8(sp) ;save register t1
+ // lui t1, %highest(__xray_FunctionEntry/Exit)
+ // addi t1, t1, %higher(__xray_FunctionEntry/Exit)
+ // slli t1, t1, 32
+ // lui ra, ra, %hi(__xray_FunctionEntry/Exit)
+ // addi ra, ra, %lo(__xray_FunctionEntry/Exit)
+ // add ra, t1, ra
+ // lui a0, %hi(function_id)
+ // addi a0, a0, %lo(function_id) ;pass function id
+ // jalr ra ;call Tracing hook
+ // ld t1, 8(sp) ;restore register t1
+ // ld a0, 16(sp) ;restore register a0
+ // ld ra, 24(sp) ;restore return address
+ // addi sp, sp, 32 ;delete stack frame
+ //
+ // Replacement of the first 4-byte instruction should be the last and atomic
+ // operation, so that the user code which reaches the sled concurrently
+ // either jumps over the whole sled, or executes the whole sled when the
+ // latter is ready.
+ //
+ // When |Enable|==false, we set back the first instruction in the sled to be
+ // J 44 bytes (rv32)
+ // J 68 bytes (rv64)
+
+ uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address());
+ if (Enable) {
+#if __riscv_xlen == 64
+ // If the ISA is RV64, the Tracing Hook needs to be typecast to a 64 bit
+ // value.
+ uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint64_t>(TracingHook));
+ uint32_t HiTracingHookAddr = hi20(reinterpret_cast<uint64_t>(TracingHook));
+ uint32_t HigherTracingHookAddr =
+ lo12((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32);
+ uint32_t HighestTracingHookAddr =
+ hi20((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32);
+#elif __riscv_xlen == 32
+ // We typecast the Tracing Hook to a 32 bit value for RV32
+ uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint32_t>(TracingHook));
+ uint32_t HiTracingHookAddr = hi20((reinterpret_cast<uint32_t>(TracingHook));
+#endif
+ uint32_t LoFunctionID = lo12(FuncId);
+ uint32_t HiFunctionID = hi20(FuncId);
+
+ // The sled that is patched in for RISCV64 defined below. We need the entire
+ // sleds corresponding to both ISAs to be protected by defines because the
+ // first few instructions are all
diff erent, because we store doubles in
+ // case of RV64 and store words for RV32. Subsequently, we have LUI - and in
+ // case of RV64, we need extra instructions from this point on, so we see
+ //
diff erences in addresses to which instructions are stored.
+ size_t Idx = 1U;
+ const uint32_t XLenBytes = __riscv_xlen / 8;
+#if __riscv_xlen == 64
+ const uint32_t LoadOp = PatchOpcodes::PO_LD;
+ const uint32_t StoreOp = PatchOpcodes::PO_SD;
+#elif __riscv_xlen == 32
+ const uint32_t LoadOp = PatchOpcodes::PO_LW;
+ const uint32_t StoreOp = PatchOpcodes::PO_SW;
+#endif
+
+ Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP,
+ RegNum::RN_RA, 3 * XLenBytes);
+ Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP,
+ RegNum::RN_A0, 2 * XLenBytes);
+
+#if __riscv_xlen == 64
+ Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP,
+ RegNum::RN_T1, XLenBytes);
+ Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1,
+ HighestTracingHookAddr);
+ Address[Idx++] =
+ encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T1,
+ RegNum::RN_T1, HigherTracingHookAddr);
+ Address[Idx++] = encodeITypeInstruction(PatchOpcodes::PO_SLLI,
+ RegNum::RN_T1, RegNum::RN_T1, 32);
+#endif
+ Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_RA,
+ HiTracingHookAddr);
+ Address[Idx++] = encodeITypeInstruction(
+ PatchOpcodes::PO_ADDI, RegNum::RN_RA, RegNum::RN_RA, LoTracingHookAddr);
+#if __riscv_xlen == 64
+ Address[Idx++] = encodeRTypeInstruction(PatchOpcodes::PO_ADD, RegNum::RN_RA,
+ RegNum::RN_T1, RegNum::RN_RA);
+#endif
+ Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_A0,
+ HiFunctionID);
+ Address[Idx++] = encodeITypeInstruction(
+ PatchOpcodes::PO_ADDI, RegNum::RN_A0, RegNum::RN_A0, LoFunctionID);
+ Address[Idx++] = encodeITypeInstruction(PatchOpcodes::PO_JALR,
+ RegNum::RN_RA, RegNum::RN_RA, 0);
+
+#if __riscv_xlen == 64
+ Address[Idx++] =
+ encodeITypeInstruction(LoadOp, RegNum::RN_SP, RegNum::RN_T1, XLenBytes);
+#endif
+ Address[Idx++] = encodeITypeInstruction(LoadOp, RegNum::RN_SP,
+ RegNum::RN_A0, 2 * XLenBytes);
+ Address[Idx++] = encodeITypeInstruction(LoadOp, RegNum::RN_SP,
+ RegNum::RN_RA, 3 * XLenBytes);
+ Address[Idx++] = encodeITypeInstruction(
+ PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, 4 * XLenBytes);
+
+ uint32_t CreateStackSpace = encodeITypeInstruction(
+ PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, -4 * XLenBytes);
+
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateStackSpace,
+ std::memory_order_release);
+ } else {
+ uint32_t CreateBranch = encodeJTypeInstruction(
+ // Jump distance is
diff erent in both ISAs due to
diff erence in size of
+ // sleds
+#if __riscv_xlen == 64
+ PatchOpcodes::PO_J, RegNum::RN_X0,
+ 68); // jump encodes an offset of 68
+#elif __riscv_xlen == 32
+ PatchOpcodes::PO_J, RegNum::RN_X0,
+ 44); // jump encodes an offset of 44
+#endif
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateBranch,
+ std::memory_order_release);
+ }
+ return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ const XRayTrampolines &Trampolines,
+ bool LogArgs) XRAY_NEVER_INSTRUMENT {
+ // We don't support logging argument at this moment, so we always
+ // use EntryTrampoline.
+ return patchSled(Enable, FuncId, Sled, Trampolines.EntryTrampoline);
+}
+
+bool patchFunctionExit(
+ const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+ const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
+}
+
+bool patchFunctionTailExit(
+ const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
+ const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, Trampolines.TailExitTrampoline);
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return false;
+}
+
+bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return false;
+}
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {}
diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv32.S b/compiler-rt/lib/xray/xray_trampoline_riscv32.S
new file mode 100644
index 00000000000000..05e3d61e5ef71b
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_trampoline_riscv32.S
@@ -0,0 +1,89 @@
+//===-- xray_trampoline_riscv32.s ----------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the riscv32-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../sanitizer_common/sanitizer_asm.h"
+
+.macro SAVE_ARG_REGISTERS
+ // Push argument registers to stack
+ addi sp, sp, -112
+ CFI_DEF_CFA_OFFSET(112)
+ sw ra, 108(sp)
+ sw a7, 104(sp)
+ sw a6, 100(sp)
+ sw a5, 96(sp)
+ sw a4, 92(sp)
+ sw a3, 88(sp)
+ sw a2, 84(sp)
+ sw a1, 80(sp)
+ sw a0, 76(sp)
+ fsd fa7, 64(sp)
+ fsd fa6, 56(sp)
+ fsd fa5, 48(sp)
+ fsd fa4, 40(sp)
+ fsd fa3, 32(sp)
+ fsd fa2, 24(sp)
+ fsd fa1, 16(sp)
+ fsd fa0, 8(sp)
+.endm
+
+.macro RESTORE_ARG_REGISTERS
+ // Restore argument registers
+ fld fa0, 8(sp)
+ fld fa1, 16(sp)
+ fld fa2, 24(sp)
+ fld fa3, 32(sp)
+ fld fa4, 40(sp)
+ fld fa5, 48(sp)
+ fld fa6, 56(sp)
+ fld fa7, 64(sp)
+ lw a0, 76(sp)
+ lw a1, 80(sp)
+ lw a2, 84(sp)
+ lw a3, 88(sp)
+ lw a4, 92(sp)
+ lw a5, 96(sp)
+ lw a6, 100(sp)
+ lw a7, 104(sp)
+ lw ra, 108(sp)
+ addi sp, sp, 112
+ CFI_DEF_CFA_OFFSET(0)
+.endm
+
+.macro SAVE_RET_REGISTERS
+ // Push return registers to stack
+ addi sp, sp, -32
+ CFI_DEF_CFA_OFFSET(32)
+ sw ra, 28(sp)
+ sw a1, 24(sp)
+ sw a0, 20(sp)
+ fsd fa1, 8(sp)
+ fsd fa0, 0(sp)
+.endm
+
+.macro RESTORE_RET_REGISTERS
+ // Restore return registers
+ fld fa0, 0(sp)
+ fld fa1, 8(sp)
+ lw a0, 20(sp)
+ lw a1, 24(sp)
+ lw ra, 28(sp)
+ addi sp, sp, 32
+ CFI_DEF_CFA_OFFSET(0)
+.endm
+
+.macro LOAD_XLEN, rd, src
+ lw \rd, \src
+.endm
+
+#include "xray_trampoline_riscv_common.S"
diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv64.S b/compiler-rt/lib/xray/xray_trampoline_riscv64.S
new file mode 100644
index 00000000000000..692350eaaa38e0
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_trampoline_riscv64.S
@@ -0,0 +1,89 @@
+//===-- xray_trampoline_riscv64.s ----------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the riscv64-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../sanitizer_common/sanitizer_asm.h"
+
+.macro SAVE_ARG_REGISTERS
+ // Push return registers to stack
+ addi sp, sp, -144
+ CFI_DEF_CFA_OFFSET(144)
+ sd ra, 136(sp)
+ sd a7, 128(sp)
+ sd a6, 120(sp)
+ sd a5, 112(sp)
+ sd a4, 104(sp)
+ sd a3, 96(sp)
+ sd a2, 88(sp)
+ sd a1, 80(sp)
+ sd a0, 72(sp)
+ fsd fa7, 64(sp)
+ fsd fa6, 56(sp)
+ fsd fa5, 48(sp)
+ fsd fa4, 40(sp)
+ fsd fa3, 32(sp)
+ fsd fa2, 24(sp)
+ fsd fa1, 16(sp)
+ fsd fa0, 8(sp)
+.endm
+
+.macro SAVE_RET_REGISTERS
+ // Push return registers to stack
+ addi sp, sp, -48
+ CFI_DEF_CFA_OFFSET(48)
+ sd ra, 40(sp)
+ sd a1, 32(sp)
+ sd a0, 24(sp)
+ fsd fa1, 16(sp)
+ fsd fa0, 8(sp)
+.endm
+
+.macro RESTORE_RET_REGISTERS
+ // Restore return registers
+ fld fa0, 8(sp)
+ fld fa1, 16(sp)
+ ld a0, 24(sp)
+ ld a1, 32(sp)
+ ld ra, 40(sp)
+ addi sp, sp, 48
+ CFI_DEF_CFA_OFFSET(0)
+.endm
+
+.macro RESTORE_ARG_REGISTERS
+ // Restore argument registers
+ fld fa0, 8(sp)
+ fld fa1, 16(sp)
+ fld fa2, 24(sp)
+ fld fa3, 32(sp)
+ fld fa4, 40(sp)
+ fld fa5, 48(sp)
+ fld fa6, 56(sp)
+ fld fa7, 64(sp)
+ ld a0, 72(sp)
+ ld a1, 80(sp)
+ ld a2, 88(sp)
+ ld a3, 96(sp)
+ ld a4, 104(sp)
+ ld a5, 112(sp)
+ ld a6, 120(sp)
+ ld a7, 128(sp)
+ ld ra, 136(sp)
+ addi sp, sp, 144
+ CFI_DEF_CFA_OFFSET(0)
+.endm
+
+.macro LOAD_XLEN, rd, src
+ ld \rd, \src
+.endm
+
+#include "xray_trampoline_riscv_common.S"
diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv_common.S b/compiler-rt/lib/xray/xray_trampoline_riscv_common.S
new file mode 100644
index 00000000000000..746d612e982045
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_trampoline_riscv_common.S
@@ -0,0 +1,96 @@
+//===-- xray_trampoline_riscv_common.s --------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the trampolines code shared between riscv32 and riscv64.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../builtins/assembly.h"
+
+ .text
+ .p2align 2
+ .global ASM_SYMBOL(__xray_FunctionEntry)
+ ASM_TYPE_FUNCTION(__xray_FunctionEntry)
+ASM_SYMBOL(__xray_FunctionEntry):
+ CFI_STARTPROC
+ SAVE_ARG_REGISTERS
+
+ // Load the handler function pointer into a2
+ la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
+ LOAD_XLEN a2, 0(a2)
+
+ // Handler address will be null if it is not set
+ beq a2, x0, 1f
+
+ // If we reach here, we are tracing an event
+ // a0 already contains function id
+ // a1 = 0 means we are tracing an entry event
+ li a1, 0
+ jalr a2
+
+1:
+ RESTORE_ARG_REGISTERS
+ jr ra
+ ASM_SIZE(__xray_FunctionEntry)
+ CFI_ENDPROC
+
+ .text
+ .p2align 2
+ .global ASM_SYMBOL(__xray_FunctionExit)
+ ASM_TYPE_FUNCTION(__xray_FunctionExit)
+ASM_SYMBOL(__xray_FunctionExit):
+ CFI_STARTPROC
+ SAVE_RET_REGISTERS
+
+ // Load the handler function pointer into a2
+ la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
+ LOAD_XLEN a2, 0(a2)
+
+ // Handler address will be null if it is not set
+ beq a2, x0, 1f
+
+ // If we reach here, we are tracing an event
+ // a0 already contains function id
+ // a1 = 1 means we are tracing an exit event
+ li a1, 1
+ jalr a2
+
+1:
+ RESTORE_RET_REGISTERS
+ jr ra
+ ASM_SIZE(__xray_FunctionExit)
+ CFI_ENDPROC
+
+ .text
+ .p2align 2
+ .global ASM_SYMBOL(__xray_FunctionTailExit)
+ ASM_TYPE_FUNCTION(__xray_FunctionTailExit)
+ASM_SYMBOL(__xray_FunctionTailExit):
+ CFI_STARTPROC
+ SAVE_ARG_REGISTERS
+
+ // Load the handler function pointer into a2
+ la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
+ LOAD_XLEN a2, 0(a2)
+
+ // Handler address will be null if it is not set
+ beq a2, x0, 1f
+
+ // If we reach here, we are tracing an event
+ // a0 already contains function id
+ // a1 = 2 means we are tracing a tail exit event
+ li a1, 2
+ jalr a2
+
+1:
+ RESTORE_ARG_REGISTERS
+ jr ra
+ ASM_SIZE(__xray_FunctionTailExit)
+ CFI_ENDPROC
diff --git a/compiler-rt/lib/xray/xray_tsc.h b/compiler-rt/lib/xray/xray_tsc.h
index e1cafe1bf11d2d..b62a686d6ce0f2 100644
--- a/compiler-rt/lib/xray/xray_tsc.h
+++ b/compiler-rt/lib/xray/xray_tsc.h
@@ -43,7 +43,7 @@ inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
#elif defined(__powerpc64__)
#include "xray_powerpc64.inc"
#elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
- defined(__hexagon__) || defined(__loongarch_lp64)
+ defined(__hexagon__) || defined(__loongarch_lp64) || defined(__riscv)
// Emulated TSC.
// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
// not have a constant frequency like TSC on x86(_64), it may go faster
diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 8f718d884cd067..8af16fa6249f41 100644
--- a/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -233,10 +233,13 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
case Triple::ArchType::mips:
case Triple::ArchType::mipsel:
case Triple::ArchType::mips64:
- case Triple::ArchType::mips64el: {
+ case Triple::ArchType::mips64el:
+ case Triple::ArchType::riscv32:
+ case Triple::ArchType::riscv64: {
// For the architectures which don't have a single return instruction
InstrumentationOptions op;
- op.HandleTailcall = false;
+ // RISC-V supports patching tail calls.
+ op.HandleTailcall = MF.getTarget().getTargetTriple().isRISCV();
op.HandleAllReturns = true;
prependRetWithPatchableExit(MF, TII, op);
break;
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 0d818bc837fb70..b1990409754b08 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -113,6 +113,12 @@ class RISCVAsmPrinter : public AsmPrinter {
void emitNTLHint(const MachineInstr *MI);
+ // XRay Support
+ void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr *MI);
+ void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr *MI);
+ void LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI);
+ void emitSled(const MachineInstr *MI, SledKind Kind);
+
bool lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
};
}
@@ -316,6 +322,22 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) {
return LowerPATCHPOINT(*OutStreamer, SM, *MI);
case TargetOpcode::STATEPOINT:
return LowerSTATEPOINT(*OutStreamer, SM, *MI);
+ case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
+ // patchable-function-entry is handled in lowerToMCInst
+ // Therefore, we break out of the switch statement if we encounter it here.
+ const Function &F = MI->getParent()->getParent()->getFunction();
+ if (F.hasFnAttribute("patchable-function-entry"))
+ break;
+
+ LowerPATCHABLE_FUNCTION_ENTER(MI);
+ return;
+ }
+ case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
+ LowerPATCHABLE_FUNCTION_EXIT(MI);
+ return;
+ case TargetOpcode::PATCHABLE_TAIL_CALL:
+ LowerPATCHABLE_TAIL_CALL(MI);
+ return;
}
MCInst OutInst;
@@ -453,11 +475,71 @@ bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
emitFunctionBody();
+ // Emit the XRay table
+ emitXRayTable();
+
if (EmittedOptionArch)
RTS.emitDirectiveOptionPop();
return false;
}
+void RISCVAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr *MI) {
+ emitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void RISCVAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr *MI) {
+ emitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void RISCVAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI) {
+ emitSled(MI, SledKind::TAIL_CALL);
+}
+
+void RISCVAsmPrinter::emitSled(const MachineInstr *MI, SledKind Kind) {
+ // We want to emit the jump instruction and the nops constituting the sled.
+ // The format is as follows:
+ // .Lxray_sled_N
+ // ALIGN
+ // J .tmpN
+ // 21 or 33 C.NOP instructions
+ // .tmpN
+
+ // The following variable holds the count of the number of NOPs to be patched
+ // in for XRay instrumentation during compilation.
+ // Note that RV64 and RV32 each has a sled of 68 and 44 bytes, respectively.
+ // Assuming we're using JAL to jump to .tmpN, then we only need
+ // (68 - 4)/2 = 32 NOPs for RV64 and (44 - 4)/2 = 20 for RV32. However, there
+ // is a chance that we'll use C.JAL instead, so an additional NOP is needed.
+ const uint8_t NoopsInSledCount =
+ MI->getParent()->getParent()->getSubtarget<RISCVSubtarget>().is64Bit()
+ ? 33
+ : 21;
+
+ OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo());
+ auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+ OutStreamer->emitLabel(CurSled);
+ auto Target = OutContext.createTempSymbol();
+
+ const MCExpr *TargetExpr = MCSymbolRefExpr::create(
+ Target, MCSymbolRefExpr::VariantKind::VK_None, OutContext);
+
+ // Emit "J bytes" instruction, which jumps over the nop sled to the actual
+ // start of function.
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addExpr(TargetExpr));
+
+ // Emit NOP instructions
+ for (int8_t I = 0; I < NoopsInSledCount; ++I)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(RISCV::ADDI)
+ .addReg(RISCV::X0)
+ .addReg(RISCV::X0)
+ .addImm(0));
+
+ OutStreamer->emitLabel(Target);
+ recordSled(CurSled, *MI, Kind, 2);
+}
+
void RISCVAsmPrinter::emitStartOfAsmFile(Module &M) {
RISCVTargetStreamer &RTS =
static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 47273d6bc06d65..6a3a89371b57a0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1576,6 +1576,26 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// No patch bytes means at most a PseudoCall is emitted
return std::max(NumBytes, 8U);
}
+ case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
+ case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
+ case TargetOpcode::PATCHABLE_TAIL_CALL: {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const Function &F = MF.getFunction();
+ if (Opcode == TargetOpcode::PATCHABLE_FUNCTION_ENTER &&
+ F.hasFnAttribute("patchable-function-entry")) {
+ unsigned Num;
+ if (F.getFnAttribute("patchable-function-entry")
+ .getValueAsString()
+ .getAsInteger(10, Num))
+ return get(Opcode).getSize();
+
+ // Number of C.NOP or NOP
+ return (STI.hasStdExtCOrZca() ? 2 : 4) * Num;
+ }
+ // XRay uses C.JAL + 21 or 33 C.NOP for each sled in RV32 and RV64,
+ // respectively.
+ return STI.is64Bit() ? 68 : 44;
+ }
default:
return get(Opcode).getSize();
}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 5e775d2f87bd94..9a1881c2d39837 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -236,6 +236,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
return UserReservedRegister[i];
}
+ // XRay support - require D and C extensions.
+ bool isXRaySupported() const override { return hasStdExtD() && hasStdExtC(); }
+
// Vector codegen related methods.
bool hasVInstructions() const { return HasStdExtZve32x; }
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp
index 800f0a0f47e425..0ebdcd5bac7526 100644
--- a/llvm/lib/XRay/InstrumentationMap.cpp
+++ b/llvm/lib/XRay/InstrumentationMap.cpp
@@ -63,7 +63,8 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
ObjFile.getBinary()->getArch() == Triple::loongarch64 ||
ObjFile.getBinary()->getArch() == Triple::ppc64le ||
ObjFile.getBinary()->getArch() == Triple::arm ||
- ObjFile.getBinary()->getArch() == Triple::aarch64))
+ ObjFile.getBinary()->getArch() == Triple::aarch64 ||
+ ObjFile.getBinary()->getArch() == Triple::riscv64))
return make_error<StringError>(
"File format not supported (only does ELF and Mach-O little endian "
"64-bit).",
diff --git a/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll
new file mode 100644
index 00000000000000..ec2b986d174d91
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=riscv32-unknown-linux-gnu -mattr=+d,+c < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=riscv64-unknown-linux-gnu -mattr=+d,+c < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-RISCV64 %s
+
+define i32 @foo() nounwind "function-instrument"="xray-always" {
+; CHECK: .p2align 2
+; CHECK-LABEL: .Lxray_sled_0:
+; CHECK-NEXT: j .Ltmp0
+; CHECK-COUNT-21: nop
+; CHECK-RISCV64-COUNT-12: nop
+; CHECK-LABEL: .Ltmp0:
+ ret i32 0
+; CHECK: .p2align 2
+; CHECK-LABEL: .Lxray_sled_1:
+; CHECK-NEXT: j .Ltmp1
+; CHECK-COUNT-21: nop
+; CHECK-RISCV64-COUNT-12: nop
+; CHECK-LABEL: .Ltmp1:
+; CHECK-NEXT: ret
+}
+; CHECK: .section xray_instr_map,"ao", at progbits,foo
+; CHECK-LABEL: .Lxray_sleds_start0:
+; CHECK: .Lxray_sled_0-[[TMP:.Ltmp[0-9]+]]
+; CHECK: .Lxray_sled_1-[[TMP:.Ltmp[0-9]+]]
+; CHECK-LABEL: .Lxray_sleds_end0:
More information about the llvm-commits
mailing list