[compiler-rt] acfeb1a - [compiler-rt] Avoid truncating Symbolizer output

Paul Kirth via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 7 14:50:48 PDT 2022


Author: Paul Kirth
Date: 2022-06-07T21:50:39Z
New Revision: acfeb1a6c2446cb7e1ed6a4dcb298431b0d2b15c

URL: https://github.com/llvm/llvm-project/commit/acfeb1a6c2446cb7e1ed6a4dcb298431b0d2b15c
DIFF: https://github.com/llvm/llvm-project/commit/acfeb1a6c2446cb7e1ed6a4dcb298431b0d2b15c.diff

LOG: [compiler-rt] Avoid truncating Symbolizer output

Repalce the fixed buffer in SymbolizerProcess with InternalScopedString,
and simply append to it when reading data.

Fixes #55460

Reviewed By: vitalybuka, leonardchan

Differential Revision: https://reviews.llvm.org/D126580

Added: 
    

Modified: 
    compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
    compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
    compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
    compiler-rt/test/sanitizer_common/TestCases/symbolize_stack.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
index df122ed3425c3..29a08386d0b9f 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
@@ -90,9 +90,10 @@ class SymbolizerProcess {
 
   // Customizable by subclasses.
   virtual bool StartSymbolizerSubprocess();
-  virtual bool ReadFromSymbolizer(char *buffer, uptr max_length);
+  virtual bool ReadFromSymbolizer();
   // Return the environment to run the symbolizer in.
   virtual char **GetEnvP() { return GetEnviron(); }
+  InternalMmapVector<char> &GetBuff() { return buffer_; }
 
  private:
   virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const {
@@ -113,8 +114,7 @@ class SymbolizerProcess {
   fd_t input_fd_;
   fd_t output_fd_;
 
-  static const uptr kBufferSize = 16 * 1024;
-  char buffer_[kBufferSize];
+  InternalMmapVector<char> buffer_;
 
   static const uptr kMaxTimesRestarted = 5;
   static const int kSymbolizerStartupTimeMillis = 10;

diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
index 9177990418566..16cb65e1aac96 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
@@ -500,9 +500,9 @@ const char *SymbolizerProcess::SendCommandImpl(const char *command) {
       return nullptr;
   if (!WriteToSymbolizer(command, internal_strlen(command)))
       return nullptr;
-  if (!ReadFromSymbolizer(buffer_, kBufferSize))
-      return nullptr;
-  return buffer_;
+  if (!ReadFromSymbolizer())
+    return nullptr;
+  return buffer_.data();
 }
 
 bool SymbolizerProcess::Restart() {
@@ -513,31 +513,33 @@ bool SymbolizerProcess::Restart() {
   return StartSymbolizerSubprocess();
 }
 
-bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) {
-  if (max_length == 0)
-    return true;
-  uptr read_len = 0;
-  while (true) {
+bool SymbolizerProcess::ReadFromSymbolizer() {
+  buffer_.clear();
+  constexpr uptr max_length = 1024;
+  bool ret = true;
+  do {
     uptr just_read = 0;
-    bool success = ReadFromFile(input_fd_, buffer + read_len,
-                                max_length - read_len - 1, &just_read);
+    uptr size_before = buffer_.size();
+    buffer_.resize(size_before + max_length);
+    buffer_.resize(buffer_.capacity());
+    bool ret = ReadFromFile(input_fd_, &buffer_[size_before],
+                            buffer_.size() - size_before, &just_read);
+
+    if (!ret)
+      just_read = 0;
+
+    buffer_.resize(size_before + just_read);
+
     // We can't read 0 bytes, as we don't expect external symbolizer to close
     // its stdout.
-    if (!success || just_read == 0) {
+    if (just_read == 0) {
       Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
-      return false;
-    }
-    read_len += just_read;
-    if (ReachedEndOfOutput(buffer, read_len))
-      break;
-    if (read_len + 1 == max_length) {
-      Report("WARNING: Symbolizer buffer too small\n");
-      read_len = 0;
+      ret = false;
       break;
     }
-  }
-  buffer[read_len] = '\0';
-  return true;
+  } while (!ReachedEndOfOutput(buffer_.data(), buffer_.size()));
+  buffer_.push_back('\0');
+  return ret;
 }
 
 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {

diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
index 6f18d7cbd7470..8be7709b6038b 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
@@ -225,24 +225,24 @@ class Addr2LineProcess final : public SymbolizerProcess {
 
   bool ReachedEndOfOutput(const char *buffer, uptr length) const override;
 
-  bool ReadFromSymbolizer(char *buffer, uptr max_length) override {
-    if (!SymbolizerProcess::ReadFromSymbolizer(buffer, max_length))
+  bool ReadFromSymbolizer() override {
+    if (!SymbolizerProcess::ReadFromSymbolizer())
       return false;
-    // The returned buffer is empty when output is valid, but exceeds
-    // max_length.
-    if (*buffer == '\0')
-      return true;
+    auto &buff = GetBuff();
     // We should cut out output_terminator_ at the end of given buffer,
     // appended by addr2line to mark the end of its meaningful output.
     // We cannot scan buffer from it's beginning, because it is legal for it
     // to start with output_terminator_ in case given offset is invalid. So,
     // scanning from second character.
-    char *garbage = internal_strstr(buffer + 1, output_terminator_);
+    char *garbage = internal_strstr(buff.data() + 1, output_terminator_);
     // This should never be NULL since buffer must end up with
     // output_terminator_.
     CHECK(garbage);
+
     // Trim the buffer.
-    garbage[0] = '\0';
+    uintptr_t new_size = garbage - buff.data();
+    GetBuff().resize(new_size);
+    GetBuff().push_back('\0');
     return true;
   }
 

diff  --git a/compiler-rt/test/sanitizer_common/TestCases/symbolize_stack.cpp b/compiler-rt/test/sanitizer_common/TestCases/symbolize_stack.cpp
index 37b7e98339eb6..96e874e5bad40 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/symbolize_stack.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/symbolize_stack.cpp
@@ -5,11 +5,6 @@
 // On Darwin LSan reports a false positive
 // XFAIL: darwin && lsan
 
-// FIXME: https://github.com/llvm/llvm-project/issues/55460
-// On Linux its possible for symbolizer output to be truncated and to match the
-// check below. Remove when the underlying problem has been addressed.
-// UNSUPPORTED: linux
-
 #include <sanitizer/common_interface_defs.h>
 #include <vector>
 
@@ -31,6 +26,6 @@ __attribute__((noinline)) void A<0>::RecursiveTemplateFunction(const T &) {
 }
 
 int main() {
-  // CHECK: {{vector<.*vector<.*vector<.*vector<.*vector<}}
+  // CHECK: {{#[0-9]+.*A<0>.*vector<.*vector<.*vector<.*vector<.*vector<.*vector<.*vector<.*vector<.*vector<.*vector<.*vector<.*vector.*symbolize_stack.cpp:25}}
   A<10>().RecursiveTemplateFunction(0);
 }


        


More information about the llvm-commits mailing list