[Lldb-commits] [lldb] [WIP] memory find speedup+bugfix (PR #104193)

Pavel Labath via lldb-commits lldb-commits at lists.llvm.org
Thu Aug 15 07:00:27 PDT 2024


https://github.com/labath updated https://github.com/llvm/llvm-project/pull/104193

>From a537a48c444e9dec3a85241d9726d6f3187a43cf Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Wed, 14 Aug 2024 19:58:27 +0200
Subject: [PATCH] [WIP] memory find speedup+bugfix

---
 lldb/source/Target/Process.cpp                | 58 ++++++-------
 .../memory/find/TestMemoryFind.py             | 10 +++
 .../API/functionalities/memory/find/main.cpp  | 82 +++++++++++++++++--
 3 files changed, 108 insertions(+), 42 deletions(-)

diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index e3c4f2ee398cc4..4e3d7651a066ec 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -114,33 +114,6 @@ class ProcessOptionValueProperties
   }
 };
 
-class ProcessMemoryIterator {
-public:
-  ProcessMemoryIterator(Process &process, lldb::addr_t base)
-      : m_process(process), m_base_addr(base) {}
-
-  bool IsValid() { return m_is_valid; }
-
-  uint8_t operator[](lldb::addr_t offset) {
-    if (!IsValid())
-      return 0;
-
-    uint8_t retval = 0;
-    Status error;
-    if (0 == m_process.ReadMemory(m_base_addr + offset, &retval, 1, error)) {
-      m_is_valid = false;
-      return 0;
-    }
-
-    return retval;
-  }
-
-private:
-  Process &m_process;
-  const lldb::addr_t m_base_addr;
-  bool m_is_valid = true;
-};
-
 static constexpr OptionEnumValueElement g_follow_fork_mode_values[] = {
     {
         eFollowParent,
@@ -3368,20 +3341,37 @@ lldb::addr_t Process::FindInMemory(lldb::addr_t low, lldb::addr_t high,
     return LLDB_INVALID_ADDRESS;
 
   std::vector<size_t> bad_char_heuristic(256, size);
-  ProcessMemoryIterator iterator(*this, low);
-
   for (size_t idx = 0; idx < size - 1; idx++) {
     decltype(bad_char_heuristic)::size_type bcu_idx = buf[idx];
     bad_char_heuristic[bcu_idx] = size - idx - 1;
   }
-  for (size_t s = 0; s <= (region_size - size);) {
+
+  llvm::SmallVector<uint8_t, 0> mem;
+  addr_t mem_pos = low;
+  const size_t read_size = std::max<size_t>(size, 0x10000);
+
+  for (addr_t s = low; s <= (high - size);) {
+    if (s + size >= mem.size() + mem_pos) {
+      mem.resize_for_overwrite(read_size);
+      Status error;
+      mem.resize(
+          ReadMemory(s, mem.data(), std::min(mem.size(), high - s), error));
+      mem_pos = s;
+      if (error.Fail()) {
+        MemoryRegionInfo info;
+        error = GetMemoryRegionInfo(s, info);
+        if (error.Fail())
+          return LLDB_INVALID_ADDRESS;
+        s = info.GetRange().GetRangeEnd();
+        continue;
+      }
+    }
     int64_t j = size - 1;
-    while (j >= 0 && buf[j] == iterator[s + j])
+    while (j >= 0 && buf[j] == mem[s + j - mem_pos])
       j--;
     if (j < 0)
-      return low + s;
-    else
-      s += bad_char_heuristic[iterator[s + size - 1]];
+      return s;
+    s += bad_char_heuristic[mem[s + size - 1 - mem_pos]];
   }
 
   return LLDB_INVALID_ADDRESS;
diff --git a/lldb/test/API/functionalities/memory/find/TestMemoryFind.py b/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
index 09611cc808777d..a606899498b722 100644
--- a/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
+++ b/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
@@ -79,3 +79,13 @@ def test_memory_find(self):
             'memory find -s "nothere" `stringdata` `stringdata+10`',
             substrs=["data not found within the range."],
         )
+
+        pagesize = self.frame().FindVariable("pagesize").GetValueAsUnsigned()
+        mem_with_holes = self.frame().FindVariable("mem_with_holes").GetValueAsUnsigned()
+        matches_var = self.frame().FindVariable("matches")
+        self.assertEqual(matches_var.GetNumChildren(), 4)
+        matches = [f'data found at location: {matches_var.GetChildAtIndex(i).GetValueAsUnsigned():#x}' for i in range(4)]
+        self.expect(
+            'memory find -c 5 -s "needle" `mem_with_holes` `mem_with_holes+5*pagesize`',
+            substrs=matches + ["no more matches within the range"],
+        )
diff --git a/lldb/test/API/functionalities/memory/find/main.cpp b/lldb/test/API/functionalities/memory/find/main.cpp
index e3dcfc762ee0f9..e5525e3ca1f73f 100644
--- a/lldb/test/API/functionalities/memory/find/main.cpp
+++ b/lldb/test/API/functionalities/memory/find/main.cpp
@@ -1,9 +1,75 @@
-#include <stdio.h>
-#include <stdint.h>
-
-int main (int argc, char const *argv[])
-{
-    const char* stringdata = "hello world; I like to write text in const char pointers";
-    uint8_t bytedata[] = {0xAA,0xBB,0xCC,0xDD,0xEE,0xFF,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99};
-    return 0; // break here
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <initializer_list>
+
+#ifdef _WIN32
+#include "Windows.h"
+
+int getpagesize() {
+  SYSTEM_INFO system_info;
+  GetSystemInfo(&system_info);
+  return system_info.dwPageSize;
+}
+
+void *allocate_memory_with_holes() {
+  int pagesize = getpagesize();
+  void *mem = VirtualAlloc(nullptr, 5 * pagesize, MEM_RESERVE, PAGE_NOACCESS);
+  if (!mem) {
+    std::cerr << std::system_category().message(GetLastError()) << std::endl;
+    exit(1);
+  }
+  char *bytes = static_cast<char *>(mem);
+  for (int page : {0, 2, 4}) {
+    if (!VirtualAlloc(bytes + page * pagesize, pagesize, MEM_COMMIT,
+                      PAGE_READWRITE)) {
+      std::cerr << std::system_category().message(GetLastError()) << std::endl;
+      exit(1);
+    }
+  }
+  return bytes;
+}
+#else
+#include "sys/mman.h"
+#include "unistd.h"
+
+char *allocate_memory_with_holes() {
+  int pagesize = getpagesize();
+  void *mem = mmap(nullptr, 5 * pagesize, PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (mem == MAP_FAILED) {
+    perror("mmap");
+    exit(1);
+  }
+  char *bytes = static_cast<char *>(mem);
+  for (int page : {1, 3}) {
+    if (munmap(bytes + page * pagesize, pagesize) != 0) {
+      perror("munmap");
+      exit(1);
+    }
+  }
+  return bytes;
+}
+#endif
+
+int main(int argc, char const *argv[]) {
+  const char *stringdata =
+      "hello world; I like to write text in const char pointers";
+  uint8_t bytedata[] = {0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11,
+                        0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99};
+
+  char *mem_with_holes = allocate_memory_with_holes();
+  int pagesize = getpagesize();
+  char *matches[] = {
+      mem_with_holes,                // Beginning of memory
+      mem_with_holes + 2 * pagesize, // After a hole
+      mem_with_holes + 2 * pagesize +
+          pagesize / 2, // Middle of a block, after an existing match.
+      mem_with_holes + 5 * pagesize - 7, // End of memory
+  };
+  for (char *m : matches)
+    strcpy(m, "needle");
+
+  return 0; // break here
 }



More information about the lldb-commits mailing list