[libunwind] [libunwind] Replace process_vm_readv with pipe (PR #74791)

Jordan R AW via cfe-commits cfe-commits at lists.llvm.org
Wed Dec 13 13:42:30 PST 2023


https://github.com/ajordanr-google updated https://github.com/llvm/llvm-project/pull/74791

>From 1f4df1b82970c95684eed93c8f6bcaa6d6507b88 Mon Sep 17 00:00:00 2001
From: Jordan R Abrahams-Whitehead <ajordanr at google.com>
Date: Fri, 8 Dec 2023 00:09:59 +0000
Subject: [PATCH 1/4] [libunwind] Replace process_vm_readv with pipe

process_vm_readv is generally considered dangerous from a syscall
perspective, and is frequently blanket banned in seccomp filters such as
those in Chromium and ChromiumOS. We can get the same behaviour during
the invalid PC address case with pipes and write/read.

Testing to ensure that process_vm_readv does not appear, I ran the
output of check-unwind on an ARM64 device under strace. Previously,
bad_unwind_info in particular would use process_vm_readv, but with this
commit, it now uses pipe2:

```
strace test/Output/bad_unwind_info.pass.cpp.dir/t.tmp.exe \
  |& grep process_vm_readv
strace test/Output/bad_unwind_info.pass.cpp.dir/t.tmp.exe \
  |& grep pipe2
```
---
 libunwind/src/UnwindCursor.hpp | 50 ++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 647a5a9c9d92d9..5e4e376220daa0 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -33,6 +33,7 @@
 #if defined(_LIBUNWIND_TARGET_LINUX) &&                                        \
     (defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_RISCV) || \
      defined(_LIBUNWIND_TARGET_S390X))
+#include <fcntl.h>
 #include <sys/syscall.h>
 #include <sys/uio.h>
 #include <unistd.h>
@@ -2700,19 +2701,18 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_arm64 &) {
   // [1] https://github.com/torvalds/linux/blob/master/arch/arm64/kernel/vdso/sigreturn.S
   const pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP));
   // The PC might contain an invalid address if the unwind info is bad, so
-  // directly accessing it could cause a segfault. Use process_vm_readv to read
-  // the memory safely instead. process_vm_readv was added in Linux 3.2, and
-  // AArch64 supported was added in Linux 3.7, so the syscall is guaranteed to
-  // be present. Unfortunately, there are Linux AArch64 environments where the
-  // libc wrapper for the syscall might not be present (e.g. Android 5), so call
-  // the syscall directly instead.
+  // directly accessing it could cause a segfault. Use pipe/write/read to read
+  // the memory safely instead.
+  int pipefd[2];
+  if (pipe2(pipefd, O_CLOEXEC | O_NONBLOCK) == -1)
+    return false;
   uint32_t instructions[2];
-  struct iovec local_iov = {&instructions, sizeof instructions};
-  struct iovec remote_iov = {reinterpret_cast<void *>(pc), sizeof instructions};
-  long bytesRead =
-      syscall(SYS_process_vm_readv, getpid(), &local_iov, 1, &remote_iov, 1, 0);
+  const auto bufferSize = sizeof instructions;
+  if (write(pipefd[1], reinterpret_cast<void *>(pc), bufferSize) != bufferSize)
+    return false;
+  const auto bytesRead = read(pipefd[0], instructions, bufferSize);
   // Look for instructions: mov x8, #0x8b; svc #0x0
-  if (bytesRead != sizeof instructions || instructions[0] != 0xd2801168 ||
+  if (bytesRead != bufferSize || instructions[0] != 0xd2801168 ||
       instructions[1] != 0xd4000001)
     return false;
 
@@ -2762,17 +2762,20 @@ int UnwindCursor<A, R>::stepThroughSigReturn(Registers_arm64 &) {
 template <typename A, typename R>
 bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_riscv &) {
   const pint_t pc = static_cast<pint_t>(getReg(UNW_REG_IP));
+  int pipefd[2];
+  if (pipe2(pipefd, O_CLOEXEC | O_NONBLOCK) == -1)
+    return false;
   uint32_t instructions[2];
-  struct iovec local_iov = {&instructions, sizeof instructions};
-  struct iovec remote_iov = {reinterpret_cast<void *>(pc), sizeof instructions};
-  long bytesRead =
-      syscall(SYS_process_vm_readv, getpid(), &local_iov, 1, &remote_iov, 1, 0);
+  const auto bufferSize = sizeof instructions;
+  if (write(pipefd[1], reinterpret_cast<void *>(pc), bufferSize) != bufferSize)
+    return false;
+  const auto bytesRead = read(pipefd[0], instructions, bufferSize);
   // Look for the two instructions used in the sigreturn trampoline
   // __vdso_rt_sigreturn:
   //
   // 0x08b00893 li a7,0x8b
   // 0x00000073 ecall
-  if (bytesRead != sizeof instructions || instructions[0] != 0x08b00893 ||
+  if (bytesRead != bufferSize || instructions[0] != 0x08b00893 ||
       instructions[1] != 0x00000073)
     return false;
 
@@ -2822,13 +2825,18 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_s390x &) {
   // onto the stack.
   const pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP));
   // The PC might contain an invalid address if the unwind info is bad, so
-  // directly accessing it could cause a segfault. Use process_vm_readv to
+  // directly accessing it could cause a segfault. Use pipe/write/read to
   // read the memory safely instead.
   uint16_t inst;
-  struct iovec local_iov = {&inst, sizeof inst};
-  struct iovec remote_iov = {reinterpret_cast<void *>(pc), sizeof inst};
-  long bytesRead = process_vm_readv(getpid(), &local_iov, 1, &remote_iov, 1, 0);
-  if (bytesRead == sizeof inst && (inst == 0x0a77 || inst == 0x0aad)) {
+  int pipefd[2];
+  if (pipe2(pipefd, O_CLOEXEC | O_NONBLOCK) == -1)
+    return false;
+  uint16_t inst;
+  const auto bufferSize = sizeof inst;
+  if (write(pipefd[1], reinterpret_cast<void *>(pc), bufferSize) != bufferSize)
+    return false;
+  const auto bytesRead = read(pipefd[0], &inst, bufferSize);
+  if (bytesRead == bufferSize && (inst == 0x0a77 || inst == 0x0aad)) {
     _info = {};
     _info.start_ip = pc;
     _info.end_ip = pc + 2;

>From c65626a68f2d47ac84ad8df4b3cfdfead43ffe63 Mon Sep 17 00:00:00 2001
From: Jordan R Abrahams-Whitehead <ajordanr at google.com>
Date: Fri, 8 Dec 2023 22:41:17 +0000
Subject: [PATCH 2/4] fixup! [libunwind] Replace process_vm_readv with pipe

---
 libunwind/src/UnwindCursor.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 5e4e376220daa0..c36be5a6c0ad1f 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -2827,7 +2827,6 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_s390x &) {
   // The PC might contain an invalid address if the unwind info is bad, so
   // directly accessing it could cause a segfault. Use pipe/write/read to
   // read the memory safely instead.
-  uint16_t inst;
   int pipefd[2];
   if (pipe2(pipefd, O_CLOEXEC | O_NONBLOCK) == -1)
     return false;

>From 03cf6f3ae42e84ce889e57f76cf2893eef9da0f3 Mon Sep 17 00:00:00 2001
From: Jordan R Abrahams-Whitehead <ajordanr at google.com>
Date: Wed, 13 Dec 2023 21:35:55 +0000
Subject: [PATCH 3/4] fixup! [libunwind] Replace process_vm_readv with pipe

---
 libunwind/src/UnwindCursor.hpp | 66 ++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 30 deletions(-)

diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index c36be5a6c0ad1f..165434dde74a7d 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -558,6 +558,26 @@ class UnwindCursor : public AbstractUnwindCursor {
       _info.handler = 0;
     return UNW_STEP_SUCCESS;
   }
+  bool isReadableAddr(const void *addr) const {
+    // This code is heavily based on Abseil's 'address_is_readable.cc',
+    // which is Copyright Abseil Authors (2017), and provided under
+    // the Apache License 2.0.
+
+    // We have to check that addr is a nullptr because sigprocmask allows that
+    // as an argument without failure.
+    if (!addr)
+      return false;
+    // Align to 8-bytes.
+    const auto uintptr_addr = reinterpret_cast<uintptr_t>(addr) & ~uintptr_t{7};
+    const auto sigsetaddr = reinterpret_cast<sigset_t *>(uintptr_addr);
+    [[maybe_unused]] int Result = sigprocmask(/*how=*/-1, sigsetaddr, nullptr);
+    // Because our "how" is invalid, this syscall should always fail, and our
+    // errno should always be EINVAL or an EFAULT. EFAULT is not guaranteed
+    // by the POSIX standard, so this is (for now) Linux specific.
+    assert(Result == -1);
+    assert(errno == EFAULT || errno == EINVAL);
+    return errno != EFAULT;
+  }
 
   A                   &_addressSpace;
   unw_proc_info_t      _info;
@@ -2701,19 +2721,13 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_arm64 &) {
   // [1] https://github.com/torvalds/linux/blob/master/arch/arm64/kernel/vdso/sigreturn.S
   const pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP));
   // The PC might contain an invalid address if the unwind info is bad, so
-  // directly accessing it could cause a segfault. Use pipe/write/read to read
-  // the memory safely instead.
-  int pipefd[2];
-  if (pipe2(pipefd, O_CLOEXEC | O_NONBLOCK) == -1)
-    return false;
-  uint32_t instructions[2];
-  const auto bufferSize = sizeof instructions;
-  if (write(pipefd[1], reinterpret_cast<void *>(pc), bufferSize) != bufferSize)
+  // directly accessing it could cause a SIGSEGV.
+  if (!isReadableAddr(static_cast<void *>(pc)) ||
+      !isReadableAddr(static_cast<void *>(pc + 4)))
     return false;
-  const auto bytesRead = read(pipefd[0], instructions, bufferSize);
+  auto *instructions = static_cast<uint32_t *>(pc);
   // Look for instructions: mov x8, #0x8b; svc #0x0
-  if (bytesRead != bufferSize || instructions[0] != 0xd2801168 ||
-      instructions[1] != 0xd4000001)
+  if (instructions[0] != 0xd2801168 || instructions[1] != 0xd4000001)
     return false;
 
   _info = {};
@@ -2762,21 +2776,18 @@ int UnwindCursor<A, R>::stepThroughSigReturn(Registers_arm64 &) {
 template <typename A, typename R>
 bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_riscv &) {
   const pint_t pc = static_cast<pint_t>(getReg(UNW_REG_IP));
-  int pipefd[2];
-  if (pipe2(pipefd, O_CLOEXEC | O_NONBLOCK) == -1)
-    return false;
-  uint32_t instructions[2];
-  const auto bufferSize = sizeof instructions;
-  if (write(pipefd[1], reinterpret_cast<void *>(pc), bufferSize) != bufferSize)
+  // The PC might contain an invalid address if the unwind info is bad, so
+  // directly accessing it could cause a SIGSEGV.
+  if (!isReadableAddr(static_cast<void *>(pc)) ||
+      !isReadableAddr(static_cast<void *>(pc + 4)))
     return false;
-  const auto bytesRead = read(pipefd[0], instructions, bufferSize);
+  const auto *instructions = static_cast<uint32_t *>(pc);
   // Look for the two instructions used in the sigreturn trampoline
   // __vdso_rt_sigreturn:
   //
   // 0x08b00893 li a7,0x8b
   // 0x00000073 ecall
-  if (bytesRead != bufferSize || instructions[0] != 0x08b00893 ||
-      instructions[1] != 0x00000073)
+  if (instructions[0] != 0x08b00893 || instructions[1] != 0x00000073)
     return false;
 
   _info = {};
@@ -2825,17 +2836,12 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_s390x &) {
   // onto the stack.
   const pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP));
   // The PC might contain an invalid address if the unwind info is bad, so
-  // directly accessing it could cause a segfault. Use pipe/write/read to
-  // read the memory safely instead.
-  int pipefd[2];
-  if (pipe2(pipefd, O_CLOEXEC | O_NONBLOCK) == -1)
-    return false;
-  uint16_t inst;
-  const auto bufferSize = sizeof inst;
-  if (write(pipefd[1], reinterpret_cast<void *>(pc), bufferSize) != bufferSize)
+  // directly accessing it could cause a SIGSEGV.
+  if (!isReadableAddr(static_cast<void *>(pc)) ||
+      !isReadableAddr(static_cast<void *>(pc + 2)))
     return false;
-  const auto bytesRead = read(pipefd[0], &inst, bufferSize);
-  if (bytesRead == bufferSize && (inst == 0x0a77 || inst == 0x0aad)) {
+  const auto inst = *reinterpret_cast<uint16_t *>(pc);
+  if (inst == 0x0a77 || inst == 0x0aad) {
     _info = {};
     _info.start_ip = pc;
     _info.end_ip = pc + 2;

>From 9b22940209fb555120c574738c369eb3559e805f Mon Sep 17 00:00:00 2001
From: Jordan R Abrahams-Whitehead <ajordanr at google.com>
Date: Wed, 13 Dec 2023 21:41:51 +0000
Subject: [PATCH 4/4] fixup! [libunwind] Replace process_vm_readv with pipe

---
 libunwind/src/UnwindCursor.hpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 165434dde74a7d..f5050d327f6f04 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -12,6 +12,8 @@
 #define __UNWINDCURSOR_HPP__
 
 #include "cet_unwind.h"
+#include <errno.h>
+#include <signal.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -33,7 +35,6 @@
 #if defined(_LIBUNWIND_TARGET_LINUX) &&                                        \
     (defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_RISCV) || \
      defined(_LIBUNWIND_TARGET_S390X))
-#include <fcntl.h>
 #include <sys/syscall.h>
 #include <sys/uio.h>
 #include <unistd.h>
@@ -2725,7 +2726,7 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_arm64 &) {
   if (!isReadableAddr(static_cast<void *>(pc)) ||
       !isReadableAddr(static_cast<void *>(pc + 4)))
     return false;
-  auto *instructions = static_cast<uint32_t *>(pc);
+  auto *instructions = reinterpret_cast<uint32_t *>(pc);
   // Look for instructions: mov x8, #0x8b; svc #0x0
   if (instructions[0] != 0xd2801168 || instructions[1] != 0xd4000001)
     return false;
@@ -2781,7 +2782,7 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_riscv &) {
   if (!isReadableAddr(static_cast<void *>(pc)) ||
       !isReadableAddr(static_cast<void *>(pc + 4)))
     return false;
-  const auto *instructions = static_cast<uint32_t *>(pc);
+  const auto *instructions = reinterpret_cast<uint32_t *>(pc);
   // Look for the two instructions used in the sigreturn trampoline
   // __vdso_rt_sigreturn:
   //



More information about the cfe-commits mailing list