[Lldb-commits] [lldb] [LLDB][Minidump]Update MinidumpFileBuilder to read and write in chunks (PR #129307)

Jacob Lalonde via lldb-commits lldb-commits at lists.llvm.org
Tue Mar 4 21:48:25 PST 2025


https://github.com/Jlalond updated https://github.com/llvm/llvm-project/pull/129307

>From 2f77beefb752ffdae908101d75381268203311d6 Mon Sep 17 00:00:00 2001
From: Jacob Lalonde <jalalonde at fb.com>
Date: Fri, 28 Feb 2025 11:38:35 -0800
Subject: [PATCH 1/4] Update MinidumpFileBuilder to read and write in chunks

---
 .../Minidump/MinidumpFileBuilder.cpp          | 130 ++++++++++++------
 .../ObjectFile/Minidump/MinidumpFileBuilder.h |   7 +
 2 files changed, 97 insertions(+), 40 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
index c5013ea5e3be4..e88b606f279cd 100644
--- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
+++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
@@ -969,12 +969,82 @@ Status MinidumpFileBuilder::DumpDirectories() const {
   return error;
 }
 
-static uint64_t
-GetLargestRangeSize(const std::vector<CoreFileMemoryRange> &ranges) {
-  uint64_t max_size = 0;
-  for (const auto &core_range : ranges)
-    max_size = std::max(max_size, core_range.range.size());
-  return max_size;
+Status MinidumpFileBuilder::ReadWriteMemoryInChunks(
+    const lldb_private::CoreFileMemoryRange &range, uint64_t *bytes_read) {
+  Log *log = GetLog(LLDBLog::Object);
+  lldb::addr_t addr = range.range.start();
+  lldb::addr_t size = range.range.size();
+  // First we set the byte tally to 0, so if we do exit gracefully
+  // the caller doesn't think the random garbage on the stack is a
+  // success.
+  if (bytes_read)
+    *bytes_read = 0;
+
+  uint64_t bytes_remaining = size;
+  uint64_t total_bytes_read = 0;
+  auto data_up = std::make_unique<DataBufferHeap>(
+      std::min(bytes_remaining, MAX_WRITE_CHUNK_SIZE), 0);
+  Status error;
+  while (bytes_remaining > 0) {
+    // Get the next read chunk size as the minimum of the remaining bytes and
+    // the write chunk max size.
+    const size_t bytes_to_read =
+        std::min(bytes_remaining, MAX_WRITE_CHUNK_SIZE);
+    const size_t bytes_read_for_chunk =
+        m_process_sp->ReadMemory(range.range.start() + total_bytes_read,
+                                 data_up->GetBytes(), bytes_to_read, error);
+    if (error.Fail() || bytes_read_for_chunk == 0) {
+      LLDB_LOGF(log,
+                "Failed to read memory region at: %" PRIx64
+                ". Bytes read: %zu, error: %s",
+                addr, bytes_read_for_chunk, error.AsCString());
+      // If we've only read one byte we can just give up and return
+      if (total_bytes_read == 0)
+        return Status();
+
+      // If we've read some bytes, we stop trying to read more and return
+      // this best effort attempt
+      bytes_remaining = 0;
+    } else if (bytes_read_for_chunk != bytes_to_read) {
+      LLDB_LOGF(
+          log, "Memory region at: %" PRIx64 " failed to read %" PRIx64 " bytes",
+          addr, size);
+
+      // If we've read some bytes, we stop trying to read more and return
+      // this best effort attempt
+      bytes_remaining = 0;
+    }
+
+    // Write to the minidump file with the chunk potentially flushing to disk.
+    // this is the only place we want to return a true error, so that we can
+    // fail. If we get an error writing to disk we can't easily gaurauntee
+    // that we won't corrupt the minidump.
+    error = AddData(data_up->GetBytes(), bytes_read_for_chunk);
+    if (error.Fail())
+      return error;
+
+    // This check is so we don't overflow when the error code above sets the
+    // bytes to read to 0 (the graceful exit condition).
+    if (bytes_remaining > 0)
+      bytes_remaining -= bytes_read_for_chunk;
+
+    total_bytes_read += bytes_read_for_chunk;
+    // If the caller wants a tally back of the bytes_read, update it as we
+    // write. We do this in the loop so if we encounter an error we can
+    // report the accurate total.
+    if (bytes_read)
+      *bytes_read += bytes_read_for_chunk;
+
+    // We clear the heap per loop, without checking if we
+    // read the expected bytes this is so we don't allocate
+    // more than the MAX_WRITE_CHUNK_SIZE. But we do check if
+    // this is even worth clearing before we return and
+    // destruct the heap.
+    if (bytes_remaining > 0)
+      data_up->Clear();
+  }
+
+  return error;
 }
 
 Status
@@ -987,8 +1057,6 @@ MinidumpFileBuilder::AddMemoryList_32(std::vector<CoreFileMemoryRange> &ranges,
 
   Log *log = GetLog(LLDBLog::Object);
   size_t region_index = 0;
-  auto data_up =
-      std::make_unique<DataBufferHeap>(GetLargestRangeSize(ranges), 0);
   for (const auto &core_range : ranges) {
     // Take the offset before we write.
     const offset_t offset_for_data = GetCurrentDataEndOffset();
@@ -1003,18 +1071,15 @@ MinidumpFileBuilder::AddMemoryList_32(std::vector<CoreFileMemoryRange> &ranges,
     ++region_index;
 
     progress.Increment(1, "Adding Memory Range " + core_range.Dump());
-    const size_t bytes_read =
-        m_process_sp->ReadMemory(addr, data_up->GetBytes(), size, error);
-    if (error.Fail() || bytes_read == 0) {
-      LLDB_LOGF(log, "Failed to read memory region. Bytes read: %zu, error: %s",
-                bytes_read, error.AsCString());
-      // Just skip sections with errors or zero bytes in 32b mode
+    uint64_t bytes_read;
+    error = ReadWriteMemoryInChunks(core_range, &bytes_read);
+    if (error.Fail())
+      return error;
+
+    // If we completely failed to read this range
+    // we can just omit any of the book keeping.
+    if (bytes_read == 0)
       continue;
-    } else if (bytes_read != size) {
-      LLDB_LOGF(
-          log, "Memory region at: %" PRIx64 " failed to read %" PRIx64 " bytes",
-          addr, size);
-    }
 
     MemoryDescriptor descriptor;
     descriptor.StartOfMemoryRange =
@@ -1026,11 +1091,6 @@ MinidumpFileBuilder::AddMemoryList_32(std::vector<CoreFileMemoryRange> &ranges,
     descriptors.push_back(descriptor);
     if (m_thread_by_range_end.count(end) > 0)
       m_thread_by_range_end[end].Stack = descriptor;
-
-    // Add the data to the buffer, flush as needed.
-    error = AddData(data_up->GetBytes(), bytes_read);
-    if (error.Fail())
-      return error;
   }
 
   // Add a directory that references this list
@@ -1106,8 +1166,6 @@ MinidumpFileBuilder::AddMemoryList_64(std::vector<CoreFileMemoryRange> &ranges,
 
   Log *log = GetLog(LLDBLog::Object);
   size_t region_index = 0;
-  auto data_up =
-      std::make_unique<DataBufferHeap>(GetLargestRangeSize(ranges), 0);
   for (const auto &core_range : ranges) {
     const addr_t addr = core_range.range.start();
     const addr_t size = core_range.range.size();
@@ -1120,27 +1178,19 @@ MinidumpFileBuilder::AddMemoryList_64(std::vector<CoreFileMemoryRange> &ranges,
     ++region_index;
 
     progress.Increment(1, "Adding Memory Range " + core_range.Dump());
-    const size_t bytes_read =
-        m_process_sp->ReadMemory(addr, data_up->GetBytes(), size, error);
-    if (error.Fail()) {
-      LLDB_LOGF(log, "Failed to read memory region. Bytes read: %zu, error: %s",
-                bytes_read, error.AsCString());
-      error.Clear();
+    uint64_t bytes_read;
+    error = ReadWriteMemoryInChunks(core_range, &bytes_read);
+    if (error.Fail())
+      return error;
+
+    if (bytes_read == 0) {
       cleanup_required = true;
       descriptors[region_index].DataSize = 0;
     }
     if (bytes_read != size) {
-      LLDB_LOGF(
-          log, "Memory region at: %" PRIx64 " failed to read %" PRIx64 " bytes",
-          addr, size);
       cleanup_required = true;
       descriptors[region_index].DataSize = bytes_read;
     }
-
-    // Add the data to the buffer, flush as needed.
-    error = AddData(data_up->GetBytes(), bytes_read);
-    if (error.Fail())
-      return error;
   }
 
   // Early return if there is no cleanup needed.
diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h
index 48293ee1bf5e5..b9ebb0d9a28b7 100644
--- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h
+++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h
@@ -142,6 +142,13 @@ class MinidumpFileBuilder {
   lldb_private::Status AddDirectory(llvm::minidump::StreamType type,
                                     uint64_t stream_size);
   lldb::offset_t GetCurrentDataEndOffset() const;
+
+  // Read a memory region from the process and write it to the file
+  // in fixed size chunks.
+  lldb_private::Status
+  ReadWriteMemoryInChunks(const lldb_private::CoreFileMemoryRange &range,
+                          uint64_t *bytes_read);
+
   // Stores directories to fill in later
   std::vector<llvm::minidump::Directory> m_directories;
   // When we write off the threads for the first time, we need to clean them up

>From 58501dbdbbbad3438a9b4e42ea00b56d552b1806 Mon Sep 17 00:00:00 2001
From: Jacob Lalonde <jalalonde at fb.com>
Date: Fri, 28 Feb 2025 13:43:16 -0800
Subject: [PATCH 2/4] Comment cleanup on the 0 bytes read error case

---
 .../source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
index e88b606f279cd..969155f52c4b2 100644
--- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
+++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
@@ -998,7 +998,8 @@ Status MinidumpFileBuilder::ReadWriteMemoryInChunks(
                 "Failed to read memory region at: %" PRIx64
                 ". Bytes read: %zu, error: %s",
                 addr, bytes_read_for_chunk, error.AsCString());
-      // If we've only read one byte we can just give up and return
+      // If we've read nothing, and get an error or fail to read
+      // we can just give up early.
       if (total_bytes_read == 0)
         return Status();
 

>From 4d55b066ded5474d8648e0972b5668c5e474bb80 Mon Sep 17 00:00:00 2001
From: Jacob Lalonde <jalalonde at fb.com>
Date: Mon, 3 Mar 2025 09:37:52 -0800
Subject: [PATCH 3/4] Add one more overflow safety check, const some stuff

---
 .../ObjectFile/Minidump/MinidumpFileBuilder.cpp      | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
index 969155f52c4b2..35a6ad6eb4bf4 100644
--- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
+++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
@@ -972,8 +972,8 @@ Status MinidumpFileBuilder::DumpDirectories() const {
 Status MinidumpFileBuilder::ReadWriteMemoryInChunks(
     const lldb_private::CoreFileMemoryRange &range, uint64_t *bytes_read) {
   Log *log = GetLog(LLDBLog::Object);
-  lldb::addr_t addr = range.range.start();
-  lldb::addr_t size = range.range.size();
+  const lldb::addr_t addr = range.range.start();
+  const lldb::addr_t size = range.range.size();
   // First we set the byte tally to 0, so if we do exit gracefully
   // the caller doesn't think the random garbage on the stack is a
   // success.
@@ -1024,7 +1024,13 @@ Status MinidumpFileBuilder::ReadWriteMemoryInChunks(
     if (error.Fail())
       return error;
 
-    // This check is so we don't overflow when the error code above sets the
+    // If the bytes read in this chunk would cause us to overflow, set the
+    // remaining bytes to 0 so we exit. This is a safety check so we don't
+    // get stuck building a bigger file forever.
+    if (bytes_read_for_chunk > bytes_remaining)
+      bytes_remaining = 0;
+
+    // This check is so we don't overflow when the error above sets the
     // bytes to read to 0 (the graceful exit condition).
     if (bytes_remaining > 0)
       bytes_remaining -= bytes_read_for_chunk;

>From f73943baf1ea30980dccac926b10b30f694d9b55 Mon Sep 17 00:00:00 2001
From: Jacob Lalonde <jalalonde at fb.com>
Date: Tue, 4 Mar 2025 21:48:13 -0800
Subject: [PATCH 4/4] Discussed with Jeffrey offline, fix clear causing buffer
 overflow. Move data_up memory allocation back to the add memory functions and
 more intelligently allocate the buffer

---
 .../Minidump/MinidumpFileBuilder.cpp          | 29 +++++++++++--------
 .../ObjectFile/Minidump/MinidumpFileBuilder.h |  6 ++--
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
index 35a6ad6eb4bf4..d6a0a7a6369b5 100644
--- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
+++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
@@ -970,7 +970,10 @@ Status MinidumpFileBuilder::DumpDirectories() const {
 }
 
 Status MinidumpFileBuilder::ReadWriteMemoryInChunks(
+    const std::unique_ptr<lldb_private::DataBufferHeap> &data_up,
     const lldb_private::CoreFileMemoryRange &range, uint64_t *bytes_read) {
+  if (!data_up)
+    return Status::FromErrorString("No buffer supplied to read memory.");
   Log *log = GetLog(LLDBLog::Object);
   const lldb::addr_t addr = range.range.start();
   const lldb::addr_t size = range.range.size();
@@ -982,8 +985,6 @@ Status MinidumpFileBuilder::ReadWriteMemoryInChunks(
 
   uint64_t bytes_remaining = size;
   uint64_t total_bytes_read = 0;
-  auto data_up = std::make_unique<DataBufferHeap>(
-      std::min(bytes_remaining, MAX_WRITE_CHUNK_SIZE), 0);
   Status error;
   while (bytes_remaining > 0) {
     // Get the next read chunk size as the minimum of the remaining bytes and
@@ -1041,19 +1042,19 @@ Status MinidumpFileBuilder::ReadWriteMemoryInChunks(
     // report the accurate total.
     if (bytes_read)
       *bytes_read += bytes_read_for_chunk;
-
-    // We clear the heap per loop, without checking if we
-    // read the expected bytes this is so we don't allocate
-    // more than the MAX_WRITE_CHUNK_SIZE. But we do check if
-    // this is even worth clearing before we return and
-    // destruct the heap.
-    if (bytes_remaining > 0)
-      data_up->Clear();
   }
 
   return error;
 }
 
+static uint64_t
+GetLargestRangeSize(const std::vector<CoreFileMemoryRange> &ranges) {
+  uint64_t max_size = 0;
+  for (const auto &core_range : ranges)
+    max_size = std::max(max_size, core_range.range.size());
+  return max_size;
+}
+
 Status
 MinidumpFileBuilder::AddMemoryList_32(std::vector<CoreFileMemoryRange> &ranges,
                                       Progress &progress) {
@@ -1064,6 +1065,8 @@ MinidumpFileBuilder::AddMemoryList_32(std::vector<CoreFileMemoryRange> &ranges,
 
   Log *log = GetLog(LLDBLog::Object);
   size_t region_index = 0;
+  auto data_up = std::make_unique<DataBufferHeap>(
+      std::min(GetLargestRangeSize(ranges), MAX_WRITE_CHUNK_SIZE), 0);
   for (const auto &core_range : ranges) {
     // Take the offset before we write.
     const offset_t offset_for_data = GetCurrentDataEndOffset();
@@ -1079,7 +1082,7 @@ MinidumpFileBuilder::AddMemoryList_32(std::vector<CoreFileMemoryRange> &ranges,
 
     progress.Increment(1, "Adding Memory Range " + core_range.Dump());
     uint64_t bytes_read;
-    error = ReadWriteMemoryInChunks(core_range, &bytes_read);
+    error = ReadWriteMemoryInChunks(data_up, core_range, &bytes_read);
     if (error.Fail())
       return error;
 
@@ -1155,6 +1158,8 @@ MinidumpFileBuilder::AddMemoryList_64(std::vector<CoreFileMemoryRange> &ranges,
   list_header.BaseRVA = memory_ranges_base_rva;
   m_data.AppendData(&list_header, sizeof(llvm::minidump::Memory64ListHeader));
 
+  auto data_up = std::make_unique<DataBufferHeap>(
+      std::min(GetLargestRangeSize(ranges), MAX_WRITE_CHUNK_SIZE), 0);
   bool cleanup_required = false;
   std::vector<MemoryDescriptor_64> descriptors;
   // Enumerate the ranges and create the memory descriptors so we can append
@@ -1186,7 +1191,7 @@ MinidumpFileBuilder::AddMemoryList_64(std::vector<CoreFileMemoryRange> &ranges,
 
     progress.Increment(1, "Adding Memory Range " + core_range.Dump());
     uint64_t bytes_read;
-    error = ReadWriteMemoryInChunks(core_range, &bytes_read);
+    error = ReadWriteMemoryInChunks(data_up, core_range, &bytes_read);
     if (error.Fail())
       return error;
 
diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h
index b9ebb0d9a28b7..7840d68797245 100644
--- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h
+++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h
@@ -145,9 +145,9 @@ class MinidumpFileBuilder {
 
   // Read a memory region from the process and write it to the file
   // in fixed size chunks.
-  lldb_private::Status
-  ReadWriteMemoryInChunks(const lldb_private::CoreFileMemoryRange &range,
-                          uint64_t *bytes_read);
+  lldb_private::Status ReadWriteMemoryInChunks(
+      const std::unique_ptr<lldb_private::DataBufferHeap> &data_up,
+      const lldb_private::CoreFileMemoryRange &range, uint64_t *bytes_read);
 
   // Stores directories to fill in later
   std::vector<llvm::minidump::Directory> m_directories;



More information about the lldb-commits mailing list