[Lldb-commits] [lldb] [lldb] Skip remote PutFile when MD5 hashes equal (PR #88812)

Anthony Ha via lldb-commits lldb-commits at lists.llvm.org
Mon Apr 15 21:57:45 PDT 2024


https://github.com/Awfa updated https://github.com/llvm/llvm-project/pull/88812

>From 095696411172034f80233f1722e293c1f458d67f Mon Sep 17 00:00:00 2001
From: Anthony Ha <antha at microsoft.com>
Date: Mon, 15 Apr 2024 19:34:19 +0000
Subject: [PATCH 1/2] [lldb] Skip remote PutFile when MD5 hashes equal

---
 .../gdb-server/PlatformRemoteGDBServer.cpp    |  9 +++++
 .../gdb-server/PlatformRemoteGDBServer.h      |  3 ++
 .../GDBRemoteCommunicationClient.cpp          | 36 +++++++++++++++++--
 lldb/source/Target/Platform.cpp               | 30 +++++++++++++++-
 .../GDBRemoteCommunicationClientTest.cpp      | 23 ++++++++++++
 5 files changed, 98 insertions(+), 3 deletions(-)

diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
index 88f1ad15b6b485..d9f3e40019cf29 100644
--- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
+++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
@@ -684,6 +684,15 @@ Status PlatformRemoteGDBServer::RunShellCommand(
                                           signo_ptr, command_output, timeout);
 }
 
+bool PlatformRemoteGDBServer::CalculateMD5(const FileSpec &file_spec,
+                                           uint64_t &low, uint64_t &high) {
+  if (IsConnected()) {
+    // Note that high/low are switched in the gdb remote communication client
+    return m_gdb_client_up->CalculateMD5(file_spec, high, low);
+  }
+  return false;
+}
+
 void PlatformRemoteGDBServer::CalculateTrapHandlerSymbolNames() {
   m_trap_handlers.push_back(ConstString("_sigtramp"));
 }
diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
index 638f7db5ef800c..d83fc386f59427 100644
--- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
+++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
@@ -146,6 +146,9 @@ class PlatformRemoteGDBServer : public Platform, private UserIDResolver {
 
   void CalculateTrapHandlerSymbolNames() override;
 
+  bool CalculateMD5(const FileSpec &file_spec, uint64_t &low,
+                    uint64_t &high) override;
+
   const lldb::UnixSignalsSP &GetRemoteUnixSignals() override;
 
   size_t ConnectToWaitingProcesses(lldb_private::Debugger &debugger,
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index 1f6116967a4f64..8c79d5fecf12fe 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -3433,8 +3433,40 @@ bool GDBRemoteCommunicationClient::CalculateMD5(
       return false;
     if (response.Peek() && *response.Peek() == 'x')
       return false;
-    low = response.GetHexMaxU64(false, UINT64_MAX);
-    high = response.GetHexMaxU64(false, UINT64_MAX);
+
+    // GDBRemoteCommunicationServerCommon::Handle_vFile_MD5 concatenates low and
+    // high hex strings. We can't use response.GetHexMaxU64 because that can't
+    // handle the concatenated hex string. What would happen is parsing the low
+    // would consume the whole response packet - which is a bug. Instead, we get
+    // the byte string for each low and high hex separately, and parse them.
+    //
+    // An alternate way to handle this is to change the server to put a
+    // delimiter between the low/high parts, and change the client to parse the
+    // delimiter. However, we choose not to do this so existing lldb-servers
+    // don't have to be patched
+
+    // Get low part
+    auto part = response.GetStringRef().substr(response.GetFilePos(),
+                                               sizeof(uint64_t) * 2);
+    if (part.size() != sizeof(uint64_t) * 2)
+      return false;
+    response.SetFilePos(response.GetFilePos() + part.size());
+
+    bool conversionErrored = part.getAsInteger(16, low);
+    if (conversionErrored)
+      return false;
+
+    // Get high part
+    part = response.GetStringRef().substr(response.GetFilePos(),
+                                          sizeof(uint64_t) * 2);
+    if (part.size() != sizeof(uint64_t) * 2)
+      return false;
+    response.SetFilePos(response.GetFilePos() + part.size());
+
+    conversionErrored = part.getAsInteger(16, high);
+    if (conversionErrored)
+      return false;
+
     return true;
   }
   return false;
diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp
index 4ce290dfbe035f..cdbafb17a5df4d 100644
--- a/lldb/source/Target/Platform.cpp
+++ b/lldb/source/Target/Platform.cpp
@@ -1184,7 +1184,7 @@ bool Platform::IsCompatibleArchitecture(const ArchSpec &arch,
 Status Platform::PutFile(const FileSpec &source, const FileSpec &destination,
                          uint32_t uid, uint32_t gid) {
   Log *log = GetLog(LLDBLog::Platform);
-  LLDB_LOGF(log, "[PutFile] Using block by block transfer....\n");
+  LLDB_LOGF(log, "[PutFile] Using block by block transfer....");
 
   auto source_open_options =
       File::eOpenOptionReadOnly | File::eOpenOptionCloseOnExec;
@@ -1197,6 +1197,34 @@ Status Platform::PutFile(const FileSpec &source, const FileSpec &destination,
   if (!source_file)
     return Status(source_file.takeError());
   Status error;
+
+  bool requires_upload = true;
+  {
+    uint64_t dest_md5_low, dest_md5_high;
+    bool success = CalculateMD5(destination, dest_md5_low, dest_md5_high);
+    if (!success) {
+      LLDB_LOGF(log, "[PutFile] couldn't get md5 sum of destination");
+    } else {
+      auto local_md5 = llvm::sys::fs::md5_contents(source.GetPath());
+      if (!local_md5) {
+        LLDB_LOGF(log, "[PutFile] couldn't get md5 sum of source");
+      } else {
+        uint64_t local_md5_high, local_md5_low;
+        std::tie(local_md5_high, local_md5_low) = local_md5->words();
+        LLDB_LOGF(log, "[PutFile] destination md5: %016" PRIx64 "%016" PRIx64,
+                  dest_md5_high, dest_md5_low);
+        LLDB_LOGF(log, "[PutFile]       local md5: %016" PRIx64 "%016" PRIx64,
+                  local_md5_high, local_md5_low);
+        requires_upload =
+            local_md5_high != dest_md5_high || local_md5_low != dest_md5_low;
+      }
+    }
+  }
+  if (!requires_upload) {
+    LLDB_LOGF(log, "[PutFile] skipping PutFile because md5sums match");
+    return error;
+  }
+
   uint32_t permissions = source_file.get()->GetPermissions(error);
   if (permissions == 0)
     permissions = lldb::eFilePermissionsFileDefault;
diff --git a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
index f93cf8ce679c5b..8e0b2805e292e5 100644
--- a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
+++ b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
@@ -592,3 +592,26 @@ TEST_F(GDBRemoteCommunicationClientTest, WriteMemoryTags) {
                  std::vector<uint8_t>{0x99}, "QMemTags:456789,0:80000000:99",
                  "E03", false);
 }
+
+TEST_F(GDBRemoteCommunicationClientTest, CalculateMD5) {
+  FileSpec file_spec("/foo/bar", FileSpec::Style::posix);
+  uint64_t high, low;
+  std::future<bool> async_result = std::async(std::launch::async, [&] {
+    return client.CalculateMD5(file_spec, high, low);
+  });
+
+  lldb_private::StreamString stream;
+  stream.PutCString("vFile:MD5:");
+  stream.PutStringAsRawHex8(file_spec.GetPath(false));
+  HandlePacket(server, stream.GetString().str(),
+               "F,"
+               "deadbeef01020304"
+               "05060708deadbeef");
+  ASSERT_TRUE(async_result.get());
+
+  // Server and client puts/parses low, and then high
+  const uint64_t expected_low = 0xdeadbeef01020304;
+  const uint64_t expected_high = 0x05060708deadbeef;
+  EXPECT_EQ(expected_low, low);
+  EXPECT_EQ(expected_high, high);
+}

>From cdcb326b19e454b02404f9911b3583c9f6eb9cae Mon Sep 17 00:00:00 2001
From: Anthony Ha <antha at microsoft.com>
Date: Tue, 16 Apr 2024 02:00:44 +0000
Subject: [PATCH 2/2] amend! [lldb] Skip remote PutFile when MD5 hashes equal

[lldb] Skip remote PutFile when MD5 hashes equal
---
 .../gdb-server/PlatformRemoteGDBServer.cpp    |  7 +++---
 .../GDBRemoteCommunicationClient.cpp          | 25 +++++++++++--------
 .../gdb-remote/GDBRemoteCommunicationClient.h |  2 +-
 lldb/source/Target/Platform.cpp               |  3 +--
 .../GDBRemoteCommunicationClientTest.cpp      |  4 +--
 5 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
index d9f3e40019cf29..12610dc294c80d 100644
--- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
+++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
@@ -686,11 +686,10 @@ Status PlatformRemoteGDBServer::RunShellCommand(
 
 bool PlatformRemoteGDBServer::CalculateMD5(const FileSpec &file_spec,
                                            uint64_t &low, uint64_t &high) {
-  if (IsConnected()) {
-    // Note that high/low are switched in the gdb remote communication client
-    return m_gdb_client_up->CalculateMD5(file_spec, high, low);
+  if (!IsConnected()) {
+    return false;
   }
-  return false;
+  return m_gdb_client_up->CalculateMD5(file_spec, low, high);
 }
 
 void PlatformRemoteGDBServer::CalculateTrapHandlerSymbolNames() {
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index 8c79d5fecf12fe..0c5cffd8c732c0 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -3419,7 +3419,7 @@ bool GDBRemoteCommunicationClient::GetFileExists(
 }
 
 bool GDBRemoteCommunicationClient::CalculateMD5(
-    const lldb_private::FileSpec &file_spec, uint64_t &high, uint64_t &low) {
+    const lldb_private::FileSpec &file_spec, uint64_t &low, uint64_t &high) {
   std::string path(file_spec.GetPath(false));
   lldb_private::StreamString stream;
   stream.PutCString("vFile:MD5:");
@@ -3445,26 +3445,29 @@ bool GDBRemoteCommunicationClient::CalculateMD5(
     // delimiter. However, we choose not to do this so existing lldb-servers
     // don't have to be patched
 
+    // The checksum is 128 bits encoded as hex
+    // This means low/high are halves of 64 bits each, in otherwords, 8 bytes.
+    // Each byte takes 2 hex characters in the response.
+    const size_t MD5_HALF_LENGTH = sizeof(uint64_t) * 2;
+
     // Get low part
-    auto part = response.GetStringRef().substr(response.GetFilePos(),
-                                               sizeof(uint64_t) * 2);
-    if (part.size() != sizeof(uint64_t) * 2)
+    auto part =
+        response.GetStringRef().substr(response.GetFilePos(), MD5_HALF_LENGTH);
+    if (part.size() != MD5_HALF_LENGTH)
       return false;
     response.SetFilePos(response.GetFilePos() + part.size());
 
-    bool conversionErrored = part.getAsInteger(16, low);
-    if (conversionErrored)
+    if (part.getAsInteger(/*radix=*/16, low))
       return false;
 
     // Get high part
-    part = response.GetStringRef().substr(response.GetFilePos(),
-                                          sizeof(uint64_t) * 2);
-    if (part.size() != sizeof(uint64_t) * 2)
+    part =
+        response.GetStringRef().substr(response.GetFilePos(), MD5_HALF_LENGTH);
+    if (part.size() != MD5_HALF_LENGTH)
       return false;
     response.SetFilePos(response.GetFilePos() + part.size());
 
-    conversionErrored = part.getAsInteger(16, high);
-    if (conversionErrored)
+    if (part.getAsInteger(/*radix=*/16, high))
       return false;
 
     return true;
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
index bd2d3e232b4645..4be7eb00f42b97 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
@@ -392,7 +392,7 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase {
           *command_output, // Pass nullptr if you don't want the command output
       const Timeout<std::micro> &timeout);
 
-  bool CalculateMD5(const FileSpec &file_spec, uint64_t &high, uint64_t &low);
+  bool CalculateMD5(const FileSpec &file_spec, uint64_t &low, uint64_t &high);
 
   lldb::DataBufferSP ReadRegister(
       lldb::tid_t tid,
diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp
index cdbafb17a5df4d..887d857af3c6cb 100644
--- a/lldb/source/Target/Platform.cpp
+++ b/lldb/source/Target/Platform.cpp
@@ -1209,8 +1209,7 @@ Status Platform::PutFile(const FileSpec &source, const FileSpec &destination,
       if (!local_md5) {
         LLDB_LOGF(log, "[PutFile] couldn't get md5 sum of source");
       } else {
-        uint64_t local_md5_high, local_md5_low;
-        std::tie(local_md5_high, local_md5_low) = local_md5->words();
+        const auto [local_md5_high, local_md5_low] = local_md5->words();
         LLDB_LOGF(log, "[PutFile] destination md5: %016" PRIx64 "%016" PRIx64,
                   dest_md5_high, dest_md5_low);
         LLDB_LOGF(log, "[PutFile]       local md5: %016" PRIx64 "%016" PRIx64,
diff --git a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
index 8e0b2805e292e5..6b11ec43a65dd8 100644
--- a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
+++ b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
@@ -595,9 +595,9 @@ TEST_F(GDBRemoteCommunicationClientTest, WriteMemoryTags) {
 
 TEST_F(GDBRemoteCommunicationClientTest, CalculateMD5) {
   FileSpec file_spec("/foo/bar", FileSpec::Style::posix);
-  uint64_t high, low;
+  uint64_t low, high;
   std::future<bool> async_result = std::async(std::launch::async, [&] {
-    return client.CalculateMD5(file_spec, high, low);
+    return client.CalculateMD5(file_spec, low, high);
   });
 
   lldb_private::StreamString stream;



More information about the lldb-commits mailing list