[Lldb-commits] [lldb] [lldb] Fix and speedup the `memory find` command (PR #104193)
Pavel Labath via lldb-commits
lldb-commits at lists.llvm.org
Thu Aug 29 02:29:17 PDT 2024
https://github.com/labath updated https://github.com/llvm/llvm-project/pull/104193
>From 7b8b8b699902d2365ea43e5ae015546c4d20fac8 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Wed, 14 Aug 2024 19:58:27 +0200
Subject: [PATCH 1/5] [lldb] Fix and speedup the `memory find` command
This patch fixes an issue where the `memory find` command would
effectively stop searching after encountering a memory read error (which
could happen due to unreadable memory), without giving any indication
that it has done so (it would just print it could not find the pattern).
To make matters worse, it would not terminate after encountering this
error, but rather proceed to slowly increment the address pointer, which
meant that searching a large region could take a very long time (and
give the appearance that lldb is actually searching for the thing).
The patch fixes this first problem (*) by detecting read errors and
skipping over (using GetMemoryRegionInfo) the unreadable parts of memory
and resuming the search after them. It also reads the memory in bulk (up
to 1MB), which speeds up the search significantly (up to 6x for live
processes, 18x for core files).
(*) The fix does not work on windows yet, because the ReadMemory API
does not return partial results (like it does for other systems). I'm
preparing a separate patch to deal with that.
---
lldb/source/Target/Process.cpp | 68 +++++++--------
.../memory/find/TestMemoryFind.py | 30 ++++++-
.../API/functionalities/memory/find/main.cpp | 83 +++++++++++++++++--
3 files changed, 136 insertions(+), 45 deletions(-)
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index b2a0f13b9a1549..5d066d264bd3f5 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -114,33 +114,6 @@ class ProcessOptionValueProperties
}
};
-class ProcessMemoryIterator {
-public:
- ProcessMemoryIterator(Process &process, lldb::addr_t base)
- : m_process(process), m_base_addr(base) {}
-
- bool IsValid() { return m_is_valid; }
-
- uint8_t operator[](lldb::addr_t offset) {
- if (!IsValid())
- return 0;
-
- uint8_t retval = 0;
- Status error;
- if (0 == m_process.ReadMemory(m_base_addr + offset, &retval, 1, error)) {
- m_is_valid = false;
- return 0;
- }
-
- return retval;
- }
-
-private:
- Process &m_process;
- const lldb::addr_t m_base_addr;
- bool m_is_valid = true;
-};
-
static constexpr OptionEnumValueElement g_follow_fork_mode_values[] = {
{
eFollowParent,
@@ -3367,21 +3340,48 @@ lldb::addr_t Process::FindInMemory(lldb::addr_t low, lldb::addr_t high,
if (region_size < size)
return LLDB_INVALID_ADDRESS;
+ // See "Boyer-Moore string search algorithm".
std::vector<size_t> bad_char_heuristic(256, size);
- ProcessMemoryIterator iterator(*this, low);
-
for (size_t idx = 0; idx < size - 1; idx++) {
decltype(bad_char_heuristic)::size_type bcu_idx = buf[idx];
bad_char_heuristic[bcu_idx] = size - idx - 1;
}
- for (size_t s = 0; s <= (region_size - size);) {
+
+ // Memory we're currently searching through.
+ llvm::SmallVector<uint8_t, 0> mem;
+ // Position of the memory buffer.
+ addr_t mem_pos = low;
+ // Maximum number of bytes read (and buffered). We need to read at least
+ // `size` bytes for a successful match.
+ const size_t max_read_size = std::max<size_t>(size, 0x10000);
+
+ for (addr_t s = low; s <= (high - size);) {
+ if (s + size > mem.size() + mem_pos) {
+ // We need to read more data. We don't attempt to reuse the data we've
+ // already read (up to `size-1` bytes from `s` to `mem_pos+mem.size()`).
+ // This is fine for patterns much smaller than max_read_size. For very
+ // long patterns we may need to do something more elaborate.
+ mem.resize_for_overwrite(max_read_size);
+ Status error;
+ mem.resize(
+ ReadMemory(s, mem.data(), std::min(mem.size(), high - s), error));
+ mem_pos = s;
+ if (error.Fail() || size > mem.size()) {
+ // We didn't read enough data. Skip to the next memory region.
+ MemoryRegionInfo info;
+ error = GetMemoryRegionInfo(mem_pos + mem.size(), info);
+ if (error.Fail())
+ break;
+ s = info.GetRange().GetRangeEnd();
+ continue;
+ }
+ }
int64_t j = size - 1;
- while (j >= 0 && buf[j] == iterator[s + j])
+ while (j >= 0 && buf[j] == mem[s + j - mem_pos])
j--;
if (j < 0)
- return low + s;
- else
- s += bad_char_heuristic[iterator[s + size - 1]];
+ return s; // We have a match.
+ s += bad_char_heuristic[mem[s + size - 1 - mem_pos]];
}
return LLDB_INVALID_ADDRESS;
diff --git a/lldb/test/API/functionalities/memory/find/TestMemoryFind.py b/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
index 09611cc808777d..72acfb3d600701 100644
--- a/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
+++ b/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
@@ -10,14 +10,16 @@
class MemoryFindTestCase(TestBase):
+
+ NO_DEBUG_INFO_TESTCASE = True
+
def setUp(self):
# Call super's setUp().
TestBase.setUp(self)
# Find the line number to break inside main().
self.line = line_number("main.cpp", "// break here")
- def test_memory_find(self):
- """Test the 'memory find' command."""
+ def _prepare_inferior(self):
self.build()
exe = self.getBuildArtifact("a.out")
self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
@@ -39,7 +41,10 @@ def test_memory_find(self):
# The breakpoint should have a hit count of 1.
lldbutil.check_breakpoint(self, bpno=1, expected_hit_count=1)
- # Test the memory find commands.
+ def test_memory_find(self):
+ """Test the 'memory find' command."""
+
+ self._prepare_inferior()
# Empty search string should be handled.
self.expect(
@@ -79,3 +84,22 @@ def test_memory_find(self):
'memory find -s "nothere" `stringdata` `stringdata+10`',
substrs=["data not found within the range."],
)
+
+ @expectedFailureWindows
+ def test_memory_find_with_holes(self):
+ self._prepare_inferior()
+
+ pagesize = self.frame().FindVariable("pagesize").GetValueAsUnsigned()
+ mem_with_holes = (
+ self.frame().FindVariable("mem_with_holes").GetValueAsUnsigned()
+ )
+ matches_var = self.frame().FindVariable("matches")
+ self.assertEqual(matches_var.GetNumChildren(), 4)
+ matches = [
+ f"data found at location: {matches_var.GetChildAtIndex(i).GetValueAsUnsigned():#x}"
+ for i in range(4)
+ ]
+ self.expect(
+ 'memory find -c 5 -s "needle" `mem_with_holes` `mem_with_holes+5*pagesize`',
+ substrs=matches + ["no more matches within the range"],
+ )
diff --git a/lldb/test/API/functionalities/memory/find/main.cpp b/lldb/test/API/functionalities/memory/find/main.cpp
index e3dcfc762ee0f9..e752c583d70f61 100644
--- a/lldb/test/API/functionalities/memory/find/main.cpp
+++ b/lldb/test/API/functionalities/memory/find/main.cpp
@@ -1,9 +1,76 @@
-#include <stdio.h>
-#include <stdint.h>
-
-int main (int argc, char const *argv[])
-{
- const char* stringdata = "hello world; I like to write text in const char pointers";
- uint8_t bytedata[] = {0xAA,0xBB,0xCC,0xDD,0xEE,0xFF,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99};
- return 0; // break here
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <initializer_list>
+#include <iostream>
+
+#ifdef _WIN32
+#include "Windows.h"
+
+int getpagesize() {
+ SYSTEM_INFO system_info;
+ GetSystemInfo(&system_info);
+ return system_info.dwPageSize;
+}
+
+char *allocate_memory_with_holes() {
+ int pagesize = getpagesize();
+ void *mem = VirtualAlloc(nullptr, 5 * pagesize, MEM_RESERVE, PAGE_NOACCESS);
+ if (!mem) {
+ std::cerr << std::system_category().message(GetLastError()) << std::endl;
+ exit(1);
+ }
+ char *bytes = static_cast<char *>(mem);
+ for (int page : {0, 2, 4}) {
+ if (!VirtualAlloc(bytes + page * pagesize, pagesize, MEM_COMMIT,
+ PAGE_READWRITE)) {
+ std::cerr << std::system_category().message(GetLastError()) << std::endl;
+ exit(1);
+ }
+ }
+ return bytes;
+}
+#else
+#include "sys/mman.h"
+#include "unistd.h"
+
+char *allocate_memory_with_holes() {
+ int pagesize = getpagesize();
+ void *mem = mmap(nullptr, 5 * pagesize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mem == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+ char *bytes = static_cast<char *>(mem);
+ for (int page : {1, 3}) {
+ if (munmap(bytes + page * pagesize, pagesize) != 0) {
+ perror("munmap");
+ exit(1);
+ }
+ }
+ return bytes;
+}
+#endif
+
+int main(int argc, char const *argv[]) {
+ const char *stringdata =
+ "hello world; I like to write text in const char pointers";
+ uint8_t bytedata[] = {0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11,
+ 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99};
+
+ char *mem_with_holes = allocate_memory_with_holes();
+ int pagesize = getpagesize();
+ char *matches[] = {
+ mem_with_holes, // Beginning of memory
+ mem_with_holes + 2 * pagesize, // After a hole
+ mem_with_holes + 2 * pagesize +
+ pagesize / 2, // Middle of a block, after an existing match.
+ mem_with_holes + 5 * pagesize - 7, // End of memory
+ };
+ for (char *m : matches)
+ strcpy(m, "needle");
+
+ return 0; // break here
}
>From 369ea493e3268dca52dc8bf3cf19c75f927ed77b Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Tue, 27 Aug 2024 17:16:22 +0200
Subject: [PATCH 2/5] make darker happy
---
lldb/test/API/functionalities/memory/find/TestMemoryFind.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/lldb/test/API/functionalities/memory/find/TestMemoryFind.py b/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
index 72acfb3d600701..785790e0ed8eae 100644
--- a/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
+++ b/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
@@ -10,7 +10,6 @@
class MemoryFindTestCase(TestBase):
-
NO_DEBUG_INFO_TESTCASE = True
def setUp(self):
>From 0c5dfb88874b8c87916c79e480e8af8c8b6e173f Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Wed, 28 Aug 2024 16:50:34 +0200
Subject: [PATCH 3/5] add logs
---
lldb/test/API/functionalities/memory/find/TestMemoryFind.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lldb/test/API/functionalities/memory/find/TestMemoryFind.py b/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
index 785790e0ed8eae..2e52e4528e8f08 100644
--- a/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
+++ b/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
@@ -88,6 +88,9 @@ def test_memory_find(self):
def test_memory_find_with_holes(self):
self._prepare_inferior()
+ self.runCmd("log enable gdb-remote packets")
+ self.runCmd("log enable lldb process target")
+
pagesize = self.frame().FindVariable("pagesize").GetValueAsUnsigned()
mem_with_holes = (
self.frame().FindVariable("mem_with_holes").GetValueAsUnsigned()
>From 031cc537b24ea5509ff8efc9c39f1897c6a35a77 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Wed, 28 Aug 2024 17:36:04 +0200
Subject: [PATCH 4/5] feedback David
---
lldb/source/Target/Process.cpp | 23 ++++++++++---------
.../memory/find/TestMemoryFind.py | 2 +-
.../API/functionalities/memory/find/main.cpp | 3 +++
3 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 5d066d264bd3f5..2c1fd7c91c7c3b 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -3355,33 +3355,34 @@ lldb::addr_t Process::FindInMemory(lldb::addr_t low, lldb::addr_t high,
// `size` bytes for a successful match.
const size_t max_read_size = std::max<size_t>(size, 0x10000);
- for (addr_t s = low; s <= (high - size);) {
- if (s + size > mem.size() + mem_pos) {
+ for (addr_t cur_addr = low; cur_addr <= (high - size);) {
+ if (cur_addr + size > mem_pos + mem.size()) {
// We need to read more data. We don't attempt to reuse the data we've
- // already read (up to `size-1` bytes from `s` to `mem_pos+mem.size()`).
- // This is fine for patterns much smaller than max_read_size. For very
+ // already read (up to `size-1` bytes from `cur_addr` to
+ // `mem_pos+mem.size()`). This is fine for patterns much smaller than
+ // max_read_size. For very
// long patterns we may need to do something more elaborate.
mem.resize_for_overwrite(max_read_size);
Status error;
- mem.resize(
- ReadMemory(s, mem.data(), std::min(mem.size(), high - s), error));
- mem_pos = s;
+ mem.resize(ReadMemory(cur_addr, mem.data(),
+ std::min(mem.size(), high - cur_addr), error));
+ mem_pos = cur_addr;
if (error.Fail() || size > mem.size()) {
// We didn't read enough data. Skip to the next memory region.
MemoryRegionInfo info;
error = GetMemoryRegionInfo(mem_pos + mem.size(), info);
if (error.Fail())
break;
- s = info.GetRange().GetRangeEnd();
+ cur_addr = info.GetRange().GetRangeEnd();
continue;
}
}
int64_t j = size - 1;
- while (j >= 0 && buf[j] == mem[s + j - mem_pos])
+ while (j >= 0 && buf[j] == mem[cur_addr + j - mem_pos])
j--;
if (j < 0)
- return s; // We have a match.
- s += bad_char_heuristic[mem[s + size - 1 - mem_pos]];
+ return cur_addr; // We have a match.
+ cur_addr += bad_char_heuristic[mem[cur_addr + size - 1 - mem_pos]];
}
return LLDB_INVALID_ADDRESS;
diff --git a/lldb/test/API/functionalities/memory/find/TestMemoryFind.py b/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
index 2e52e4528e8f08..32b6121f01dac3 100644
--- a/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
+++ b/lldb/test/API/functionalities/memory/find/TestMemoryFind.py
@@ -102,6 +102,6 @@ def test_memory_find_with_holes(self):
for i in range(4)
]
self.expect(
- 'memory find -c 5 -s "needle" `mem_with_holes` `mem_with_holes+5*pagesize`',
+ 'memory find --count 5 --string "needle" `mem_with_holes` `mem_with_holes+5*pagesize`',
substrs=matches + ["no more matches within the range"],
)
diff --git a/lldb/test/API/functionalities/memory/find/main.cpp b/lldb/test/API/functionalities/memory/find/main.cpp
index e752c583d70f61..9010e7b37d6b87 100644
--- a/lldb/test/API/functionalities/memory/find/main.cpp
+++ b/lldb/test/API/functionalities/memory/find/main.cpp
@@ -5,6 +5,9 @@
#include <initializer_list>
#include <iostream>
+// allocate_memory_with_holes returns a pointer to five pages of memory, where
+// the second and fourth page in the range are inaccessible (even to debugging
+// APIs). We use this to test lldb's ability to skip over in accessible blocks.
#ifdef _WIN32
#include "Windows.h"
>From 3fc95ce499d9e5d5691158fe0847afa8ec338106 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Thu, 29 Aug 2024 11:24:58 +0200
Subject: [PATCH 5/5] darwin fixes
Don't check for failure status, but rely on bytes_read. My expectations about
the api were incorrect as it can return an error even when it has successfully
read some bytes.
Also, make the code build on macos (different definition of size_t).
---
lldb/source/Target/Process.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 2c1fd7c91c7c3b..d96771acdcbfc5 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -3365,9 +3365,9 @@ lldb::addr_t Process::FindInMemory(lldb::addr_t low, lldb::addr_t high,
mem.resize_for_overwrite(max_read_size);
Status error;
mem.resize(ReadMemory(cur_addr, mem.data(),
- std::min(mem.size(), high - cur_addr), error));
+ std::min<size_t>(mem.size(), high - cur_addr), error));
mem_pos = cur_addr;
- if (error.Fail() || size > mem.size()) {
+ if (size > mem.size()) {
// We didn't read enough data. Skip to the next memory region.
MemoryRegionInfo info;
error = GetMemoryRegionInfo(mem_pos + mem.size(), info);
More information about the lldb-commits
mailing list