[Lldb-commits] [lldb] [lldb] Skip remote PutFile when MD5 hashes equal (PR #88812)
Anthony Ha via lldb-commits
lldb-commits at lists.llvm.org
Mon Apr 15 15:55:49 PDT 2024
https://github.com/Awfa created https://github.com/llvm/llvm-project/pull/88812
This PR adds a check within `PutFile` to exit early when both local and destination files have matching MD5 hashes. If they differ, or there is trouble getting the hashes, the regular code path to put the file is run.
As I needed this to talk to an `lldb-server` which runs the gdb-remote protocol, I enabled `CalculateMD5` within `Platform/gdb-server` and also found and fixed a parsing bug within it as well. Before this PR, the client is incorrectly parsing the response packet containing the checksum; after this PR, hopefully this is fixed. There is a test for the parsing behavior included in this PR.
>From 095696411172034f80233f1722e293c1f458d67f Mon Sep 17 00:00:00 2001
From: Anthony Ha <antha at microsoft.com>
Date: Mon, 15 Apr 2024 19:34:19 +0000
Subject: [PATCH] [lldb] Skip remote PutFile when MD5 hashes equal
---
.../gdb-server/PlatformRemoteGDBServer.cpp | 9 +++++
.../gdb-server/PlatformRemoteGDBServer.h | 3 ++
.../GDBRemoteCommunicationClient.cpp | 36 +++++++++++++++++--
lldb/source/Target/Platform.cpp | 30 +++++++++++++++-
.../GDBRemoteCommunicationClientTest.cpp | 23 ++++++++++++
5 files changed, 98 insertions(+), 3 deletions(-)
diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
index 88f1ad15b6b485..d9f3e40019cf29 100644
--- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
+++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
@@ -684,6 +684,15 @@ Status PlatformRemoteGDBServer::RunShellCommand(
signo_ptr, command_output, timeout);
}
+bool PlatformRemoteGDBServer::CalculateMD5(const FileSpec &file_spec,
+ uint64_t &low, uint64_t &high) {
+ if (IsConnected()) {
+ // Note that high/low are switched in the gdb remote communication client
+ return m_gdb_client_up->CalculateMD5(file_spec, high, low);
+ }
+ return false;
+}
+
void PlatformRemoteGDBServer::CalculateTrapHandlerSymbolNames() {
m_trap_handlers.push_back(ConstString("_sigtramp"));
}
diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
index 638f7db5ef800c..d83fc386f59427 100644
--- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
+++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
@@ -146,6 +146,9 @@ class PlatformRemoteGDBServer : public Platform, private UserIDResolver {
void CalculateTrapHandlerSymbolNames() override;
+ bool CalculateMD5(const FileSpec &file_spec, uint64_t &low,
+ uint64_t &high) override;
+
const lldb::UnixSignalsSP &GetRemoteUnixSignals() override;
size_t ConnectToWaitingProcesses(lldb_private::Debugger &debugger,
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index 1f6116967a4f64..8c79d5fecf12fe 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -3433,8 +3433,40 @@ bool GDBRemoteCommunicationClient::CalculateMD5(
return false;
if (response.Peek() && *response.Peek() == 'x')
return false;
- low = response.GetHexMaxU64(false, UINT64_MAX);
- high = response.GetHexMaxU64(false, UINT64_MAX);
+
+ // GDBRemoteCommunicationServerCommon::Handle_vFile_MD5 concatenates low and
+ // high hex strings. We can't use response.GetHexMaxU64 because that can't
+ // handle the concatenated hex string. What would happen is parsing the low
+ // would consume the whole response packet - which is a bug. Instead, we get
+ // the byte string for each low and high hex separately, and parse them.
+ //
+ // An alternate way to handle this is to change the server to put a
+ // delimiter between the low/high parts, and change the client to parse the
+ // delimiter. However, we choose not to do this so existing lldb-servers
+ // don't have to be patched
+
+ // Get low part
+ auto part = response.GetStringRef().substr(response.GetFilePos(),
+ sizeof(uint64_t) * 2);
+ if (part.size() != sizeof(uint64_t) * 2)
+ return false;
+ response.SetFilePos(response.GetFilePos() + part.size());
+
+ bool conversionErrored = part.getAsInteger(16, low);
+ if (conversionErrored)
+ return false;
+
+ // Get high part
+ part = response.GetStringRef().substr(response.GetFilePos(),
+ sizeof(uint64_t) * 2);
+ if (part.size() != sizeof(uint64_t) * 2)
+ return false;
+ response.SetFilePos(response.GetFilePos() + part.size());
+
+ conversionErrored = part.getAsInteger(16, high);
+ if (conversionErrored)
+ return false;
+
return true;
}
return false;
diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp
index 4ce290dfbe035f..cdbafb17a5df4d 100644
--- a/lldb/source/Target/Platform.cpp
+++ b/lldb/source/Target/Platform.cpp
@@ -1184,7 +1184,7 @@ bool Platform::IsCompatibleArchitecture(const ArchSpec &arch,
Status Platform::PutFile(const FileSpec &source, const FileSpec &destination,
uint32_t uid, uint32_t gid) {
Log *log = GetLog(LLDBLog::Platform);
- LLDB_LOGF(log, "[PutFile] Using block by block transfer....\n");
+ LLDB_LOGF(log, "[PutFile] Using block by block transfer....");
auto source_open_options =
File::eOpenOptionReadOnly | File::eOpenOptionCloseOnExec;
@@ -1197,6 +1197,34 @@ Status Platform::PutFile(const FileSpec &source, const FileSpec &destination,
if (!source_file)
return Status(source_file.takeError());
Status error;
+
+ bool requires_upload = true;
+ {
+ uint64_t dest_md5_low, dest_md5_high;
+ bool success = CalculateMD5(destination, dest_md5_low, dest_md5_high);
+ if (!success) {
+ LLDB_LOGF(log, "[PutFile] couldn't get md5 sum of destination");
+ } else {
+ auto local_md5 = llvm::sys::fs::md5_contents(source.GetPath());
+ if (!local_md5) {
+ LLDB_LOGF(log, "[PutFile] couldn't get md5 sum of source");
+ } else {
+ uint64_t local_md5_high, local_md5_low;
+ std::tie(local_md5_high, local_md5_low) = local_md5->words();
+ LLDB_LOGF(log, "[PutFile] destination md5: %016" PRIx64 "%016" PRIx64,
+ dest_md5_high, dest_md5_low);
+ LLDB_LOGF(log, "[PutFile] local md5: %016" PRIx64 "%016" PRIx64,
+ local_md5_high, local_md5_low);
+ requires_upload =
+ local_md5_high != dest_md5_high || local_md5_low != dest_md5_low;
+ }
+ }
+ }
+ if (!requires_upload) {
+ LLDB_LOGF(log, "[PutFile] skipping PutFile because md5sums match");
+ return error;
+ }
+
uint32_t permissions = source_file.get()->GetPermissions(error);
if (permissions == 0)
permissions = lldb::eFilePermissionsFileDefault;
diff --git a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
index f93cf8ce679c5b..8e0b2805e292e5 100644
--- a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
+++ b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
@@ -592,3 +592,26 @@ TEST_F(GDBRemoteCommunicationClientTest, WriteMemoryTags) {
std::vector<uint8_t>{0x99}, "QMemTags:456789,0:80000000:99",
"E03", false);
}
+
+TEST_F(GDBRemoteCommunicationClientTest, CalculateMD5) {
+ FileSpec file_spec("/foo/bar", FileSpec::Style::posix);
+ uint64_t high, low;
+ std::future<bool> async_result = std::async(std::launch::async, [&] {
+ return client.CalculateMD5(file_spec, high, low);
+ });
+
+ lldb_private::StreamString stream;
+ stream.PutCString("vFile:MD5:");
+ stream.PutStringAsRawHex8(file_spec.GetPath(false));
+ HandlePacket(server, stream.GetString().str(),
+ "F,"
+ "deadbeef01020304"
+ "05060708deadbeef");
+ ASSERT_TRUE(async_result.get());
+
+ // Server and client puts/parses low, and then high
+ const uint64_t expected_low = 0xdeadbeef01020304;
+ const uint64_t expected_high = 0x05060708deadbeef;
+ EXPECT_EQ(expected_low, low);
+ EXPECT_EQ(expected_high, high);
+}
More information about the lldb-commits
mailing list