[libunwind] fc1c478 - [libunwind] Replace process_vm_readv with SYS_rt_sigprocmask (#74791)

via cfe-commits cfe-commits at lists.llvm.org
Fri Jan 5 12:56:06 PST 2024


Author: Jordan R AW
Date: 2024-01-05T12:56:02-08:00
New Revision: fc1c478709e380164733560e4a2c8f9e8d5e2c1c

URL: https://github.com/llvm/llvm-project/commit/fc1c478709e380164733560e4a2c8f9e8d5e2c1c
DIFF: https://github.com/llvm/llvm-project/commit/fc1c478709e380164733560e4a2c8f9e8d5e2c1c.diff

LOG: [libunwind] Replace process_vm_readv with SYS_rt_sigprocmask (#74791)

process_vm_readv is generally considered dangerous from a syscall
perspective, and is frequently blanket banned in seccomp filters such as
those in Chromium and ChromiumOS. We can get the same behaviour during
the invalid PC address case with the raw SYS_rt_sigprocmask syscall.

Testing to ensure that process_vm_readv does not appear, I ran the
output of check-unwind on an ARM64 device under strace. Previously,
bad_unwind_info in particular would use process_vm_readv, but with this
commit, it now no longer uses it:

```
strace test/Output/bad_unwind_info.pass.cpp.dir/t.tmp.exe \
  |& grep process_vm_readv
```

The libunwind unittests were also tested on ARM64 ChromeOS (Gentoo
Linux) devices.

Added: 
    

Modified: 
    libunwind/src/UnwindCursor.hpp

Removed: 
    


################################################################################
diff  --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 647a5a9c9d92d9..8517d328bd058b 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -12,6 +12,8 @@
 #define __UNWINDCURSOR_HPP__
 
 #include "cet_unwind.h"
+#include <errno.h>
+#include <signal.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -990,6 +992,7 @@ class UnwindCursor : public AbstractUnwindCursor{
     R dummy;
     return stepThroughSigReturn(dummy);
   }
+  bool isReadableAddr(const pint_t addr) const;
 #if defined(_LIBUNWIND_TARGET_AARCH64)
   bool setInfoForSigReturn(Registers_arm64 &);
   int stepThroughSigReturn(Registers_arm64 &);
@@ -2700,20 +2703,12 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_arm64 &) {
   // [1] https://github.com/torvalds/linux/blob/master/arch/arm64/kernel/vdso/sigreturn.S
   const pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP));
   // The PC might contain an invalid address if the unwind info is bad, so
-  // directly accessing it could cause a segfault. Use process_vm_readv to read
-  // the memory safely instead. process_vm_readv was added in Linux 3.2, and
-  // AArch64 supported was added in Linux 3.7, so the syscall is guaranteed to
-  // be present. Unfortunately, there are Linux AArch64 environments where the
-  // libc wrapper for the syscall might not be present (e.g. Android 5), so call
-  // the syscall directly instead.
-  uint32_t instructions[2];
-  struct iovec local_iov = {&instructions, sizeof instructions};
-  struct iovec remote_iov = {reinterpret_cast<void *>(pc), sizeof instructions};
-  long bytesRead =
-      syscall(SYS_process_vm_readv, getpid(), &local_iov, 1, &remote_iov, 1, 0);
+  // directly accessing it could cause a SIGSEGV.
+  if (!isReadableAddr(pc))
+    return false;
+  auto *instructions = reinterpret_cast<const uint32_t *>(pc);
   // Look for instructions: mov x8, #0x8b; svc #0x0
-  if (bytesRead != sizeof instructions || instructions[0] != 0xd2801168 ||
-      instructions[1] != 0xd4000001)
+  if (instructions[0] != 0xd2801168 || instructions[1] != 0xd4000001)
     return false;
 
   _info = {};
@@ -2762,18 +2757,17 @@ int UnwindCursor<A, R>::stepThroughSigReturn(Registers_arm64 &) {
 template <typename A, typename R>
 bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_riscv &) {
   const pint_t pc = static_cast<pint_t>(getReg(UNW_REG_IP));
-  uint32_t instructions[2];
-  struct iovec local_iov = {&instructions, sizeof instructions};
-  struct iovec remote_iov = {reinterpret_cast<void *>(pc), sizeof instructions};
-  long bytesRead =
-      syscall(SYS_process_vm_readv, getpid(), &local_iov, 1, &remote_iov, 1, 0);
+  // The PC might contain an invalid address if the unwind info is bad, so
+  // directly accessing it could cause a SIGSEGV.
+  if (!isReadableAddr(pc))
+    return false;
+  const auto *instructions = reinterpret_cast<const uint32_t *>(pc);
   // Look for the two instructions used in the sigreturn trampoline
   // __vdso_rt_sigreturn:
   //
   // 0x08b00893 li a7,0x8b
   // 0x00000073 ecall
-  if (bytesRead != sizeof instructions || instructions[0] != 0x08b00893 ||
-      instructions[1] != 0x00000073)
+  if (instructions[0] != 0x08b00893 || instructions[1] != 0x00000073)
     return false;
 
   _info = {};
@@ -2822,13 +2816,11 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_s390x &) {
   // onto the stack.
   const pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP));
   // The PC might contain an invalid address if the unwind info is bad, so
-  // directly accessing it could cause a segfault. Use process_vm_readv to
-  // read the memory safely instead.
-  uint16_t inst;
-  struct iovec local_iov = {&inst, sizeof inst};
-  struct iovec remote_iov = {reinterpret_cast<void *>(pc), sizeof inst};
-  long bytesRead = process_vm_readv(getpid(), &local_iov, 1, &remote_iov, 1, 0);
-  if (bytesRead == sizeof inst && (inst == 0x0a77 || inst == 0x0aad)) {
+  // directly accessing it could cause a SIGSEGV.
+  if (!isReadableAddr(pc))
+    return false;
+  const auto inst = *reinterpret_cast<const uint16_t *>(pc);
+  if (inst == 0x0a77 || inst == 0x0aad) {
     _info = {};
     _info.start_ip = pc;
     _info.end_ip = pc + 2;
@@ -2974,6 +2966,37 @@ bool UnwindCursor<A, R>::getFunctionName(char *buf, size_t bufLen,
                                          buf, bufLen, offset);
 }
 
+#if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN)
+template <typename A, typename R>
+bool UnwindCursor<A, R>::isReadableAddr(const pint_t addr) const {
+  // We use SYS_rt_sigprocmask, inspired by Abseil's AddressIsReadable.
+
+  const auto sigsetAddr = reinterpret_cast<sigset_t *>(addr);
+  // We have to check that addr is nullptr because sigprocmask allows that
+  // as an argument without failure.
+  if (!sigsetAddr)
+    return false;
+  const auto saveErrno = errno;
+  // We MUST use a raw syscall here, as wrappers may try to access
+  // sigsetAddr which may cause a SIGSEGV. A raw syscall however is
+  // safe. Additionally, we need to pass the kernel_sigset_size, which is
+  // 
diff erent from libc sizeof(sigset_t). For the majority of architectures,
+  // it's 64 bits (_NSIG), and libc NSIG is _NSIG + 1.
+  const auto kernelSigsetSize = NSIG / 8;
+  [[maybe_unused]] const int Result = syscall(
+      SYS_rt_sigprocmask, /*how=*/~0, sigsetAddr, nullptr, kernelSigsetSize);
+  // Because our "how" is invalid, this syscall should always fail, and our
+  // errno should always be EINVAL or an EFAULT. This relies on the Linux
+  // kernel to check copy_from_user before checking if the "how" argument is
+  // invalid.
+  assert(Result == -1);
+  assert(errno == EFAULT || errno == EINVAL);
+  const auto readable = errno != EFAULT;
+  errno = saveErrno;
+  return readable;
+}
+#endif
+
 #if defined(_LIBUNWIND_USE_CET)
 extern "C" void *__libunwind_cet_get_registers(unw_cursor_t *cursor) {
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;


        


More information about the cfe-commits mailing list