[compiler-rt] [Sanitizers][Darwin] Correct iterating of MachO load commands (PR #130161)

Mariusz Borsa via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 7 15:17:32 PST 2025


https://github.com/wrotki updated https://github.com/llvm/llvm-project/pull/130161

>From 0931113a3ac91b659c700f30b3cb97f498314900 Mon Sep 17 00:00:00 2001
From: Mariusz Borsa <m_borsa at apple.com>
Date: Thu, 6 Mar 2025 13:25:44 -0800
Subject: [PATCH 1/3] [Sanitizers][Darwin] Correct iterating of MachO load
 commands

The condition to stop iterating so far was to look for load command cmd field == 0.  The iteration would continue past the commands area, and would finally find lc->cmd ==0, if lucky. Or crash with bus error, if out of luck.

Correcting this by limiting the number of iterations to the count specified in mach_header(_64) ncmds field.

rdar://143903403
---
 .../sanitizer_procmaps_mac.cpp                | 37 +++++++++++++++----
 .../tests/sanitizer_procmaps_mac_test.cpp     | 25 ++++++++++---
 2 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
index 5ff8d1832556f..3b67382db2add 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
@@ -334,9 +334,22 @@ static const load_command *NextCommand(const load_command *lc) {
   return (const load_command *)((const char *)lc + lc->cmdsize);
 }
 
-static void FindUUID(const load_command *first_lc, u8 *uuid_output) {
-  for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
-    if (lc->cmd != LC_UUID) continue;
+#ifdef MH_MAGIC_64
+static const size_t header_size = sizeof(mach_header_64);
+#else
+static const size_t header_size = sizeof(mach_header);
+#endif
+
+static void FindUUID(const load_command *first_lc, const mach_header *hdr,
+                     u8 *uuid_output) {
+  unsigned short curcmd = 0;
+  for (const load_command *lc = first_lc; curcmd < hdr->ncmds;
+       curcmd++, lc = NextCommand(lc)) {
+
+    CHECK_LT((const char*)lc, (const char*)hdr + header_size + hdr->sizeofcmds);
+
+    if (lc->cmd != LC_UUID)
+      continue;
 
     const uuid_command *uuid_lc = (const uuid_command *)lc;
     const uint8_t *uuid = &uuid_lc->uuid[0];
@@ -345,9 +358,16 @@ static void FindUUID(const load_command *first_lc, u8 *uuid_output) {
   }
 }
 
-static bool IsModuleInstrumented(const load_command *first_lc) {
-  for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
-    if (lc->cmd != LC_LOAD_DYLIB) continue;
+static bool IsModuleInstrumented(const load_command *first_lc,
+                                 const mach_header *hdr) {
+  unsigned short curcmd = 0;
+  for (const load_command *lc = first_lc; curcmd < hdr->ncmds;
+       curcmd++, lc = NextCommand(lc)) {
+
+    CHECK_LT((const char*)lc, (const char*)hdr + header_size + hdr->sizeofcmds);
+
+    if (lc->cmd != LC_LOAD_DYLIB)
+      continue;
 
     const dylib_command *dylib_lc = (const dylib_command *)lc;
     uint32_t dylib_name_offset = dylib_lc->dylib.name.offset;
@@ -394,9 +414,10 @@ bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
         }
       }
       FindUUID((const load_command *)data_.current_load_cmd_addr,
-               data_.current_uuid);
+               hdr, data_.current_uuid);
       data_.current_instrumented = IsModuleInstrumented(
-          (const load_command *)data_.current_load_cmd_addr);
+          (const load_command *)data_.current_load_cmd_addr,
+          hdr);
     }
 
     while (data_.current_load_cmd_count > 0) {
diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_mac_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_mac_test.cpp
index f622bba246309..1720a016d8a41 100644
--- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_mac_test.cpp
+++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_mac_test.cpp
@@ -37,10 +37,12 @@ class MemoryMappingLayoutMock final : public MemoryMappingLayout {
     .uuid = {}
   };
 
-  static constexpr char dylib_name[] = "libclang_rt.\0\0\0"; // 8 bytes aligned, padded with zeros per loader.h
+  static constexpr char libclang_rt_dylib_name[]    = "libclang_rt.\0\0\0"; // 8 bytes aligned, padded with zeros per loader.h
+  static constexpr char uninstrumented_dylib_name[] = "uninst___rt.\0\0\0"; // 8 bytes aligned, padded with zeros per loader.h
+
   static constexpr dylib_command mock_dylib_command = {
     .cmd = LC_LOAD_DYLIB,
-    .cmdsize = sizeof(dylib_command) + sizeof(dylib_name),
+    .cmdsize = sizeof(dylib_command) + sizeof(libclang_rt_dylib_name),
     .dylib = {
       .name = {
         .offset = sizeof(dylib_command)
@@ -59,7 +61,7 @@ class MemoryMappingLayoutMock final : public MemoryMappingLayout {
   std::vector<unsigned char> mock_header;
 
 public:
-  MemoryMappingLayoutMock(): MemoryMappingLayout(false) {
+  MemoryMappingLayoutMock(bool instrumented): MemoryMappingLayout(false) {
     EXPECT_EQ(mock_uuid_command.cmdsize % 8, 0u);
     EXPECT_EQ(mock_dylib_command.cmdsize % 8, 0u);
 
@@ -89,6 +91,7 @@ class MemoryMappingLayoutMock final : public MemoryMappingLayout {
     copy((unsigned char *)&mock_dylib_command,
       ((unsigned char *)&mock_dylib_command) + sizeof(dylib_command), // as mock_dylib_command.cmdsize contains the following string
       back_inserter(mock_header));
+    const char (&dylib_name)[16] = instrumented ? libclang_rt_dylib_name : uninstrumented_dylib_name;
     copy((unsigned char *)dylib_name,
       ((unsigned char *)dylib_name) + sizeof(dylib_name),
       back_inserter(mock_header));
@@ -120,8 +123,20 @@ class MemoryMappingLayoutMock final : public MemoryMappingLayout {
   }
 };
 
-TEST(MemoryMappingLayout, Next) {
-  __sanitizer::MemoryMappingLayoutMock memory_mapping;
+TEST(MemoryMappingLayout, NextInstrumented) {
+  __sanitizer::MemoryMappingLayoutMock memory_mapping(true);
+  __sanitizer::MemoryMappedSegment segment;
+  size_t size = memory_mapping.SizeOfLoadCommands();
+  while (memory_mapping.Next(&segment)) {
+    size_t offset = memory_mapping.CurrentLoadCommandOffset();
+    EXPECT_LE(offset, size);
+  }
+  size_t final_offset = memory_mapping.CurrentLoadCommandOffset();
+  EXPECT_EQ(final_offset, size); // All commands processed, no more, no less
+}
+
+TEST(MemoryMappingLayout, NextUnInstrumented) {
+  __sanitizer::MemoryMappingLayoutMock memory_mapping(false);
   __sanitizer::MemoryMappedSegment segment;
   size_t size = memory_mapping.SizeOfLoadCommands();
   while (memory_mapping.Next(&segment)) {

>From b81df5efe91818e5ee149c6fb334d13a88b1ee9e Mon Sep 17 00:00:00 2001
From: Mariusz Borsa <m_borsa at apple.com>
Date: Thu, 6 Mar 2025 13:32:48 -0800
Subject: [PATCH 2/3] [Sanitizers][Darwin][NFC] clang-format
 sanitizer_procmaps_mac files

Apparently it wasn't done before. Attaching it to the fix for:

rdar://143903403
---
 .../sanitizer_procmaps_mac.cpp                |  21 ++--
 .../tests/sanitizer_procmaps_mac_test.cpp     | 116 +++++++++---------
 2 files changed, 69 insertions(+), 68 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
index 3b67382db2add..a2b3a61e6e1e1 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
@@ -334,19 +334,19 @@ static const load_command *NextCommand(const load_command *lc) {
   return (const load_command *)((const char *)lc + lc->cmdsize);
 }
 
-#ifdef MH_MAGIC_64
+#  ifdef MH_MAGIC_64
 static const size_t header_size = sizeof(mach_header_64);
-#else
+#  else
 static const size_t header_size = sizeof(mach_header);
-#endif
+#  endif
 
 static void FindUUID(const load_command *first_lc, const mach_header *hdr,
                      u8 *uuid_output) {
   unsigned short curcmd = 0;
   for (const load_command *lc = first_lc; curcmd < hdr->ncmds;
        curcmd++, lc = NextCommand(lc)) {
-
-    CHECK_LT((const char*)lc, (const char*)hdr + header_size + hdr->sizeofcmds);
+    CHECK_LT((const char *)lc,
+             (const char *)hdr + header_size + hdr->sizeofcmds);
 
     if (lc->cmd != LC_UUID)
       continue;
@@ -363,8 +363,8 @@ static bool IsModuleInstrumented(const load_command *first_lc,
   unsigned short curcmd = 0;
   for (const load_command *lc = first_lc; curcmd < hdr->ncmds;
        curcmd++, lc = NextCommand(lc)) {
-
-    CHECK_LT((const char*)lc, (const char*)hdr + header_size + hdr->sizeofcmds);
+    CHECK_LT((const char *)lc,
+             (const char *)hdr + header_size + hdr->sizeofcmds);
 
     if (lc->cmd != LC_LOAD_DYLIB)
       continue;
@@ -413,11 +413,10 @@ bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
           continue;
         }
       }
-      FindUUID((const load_command *)data_.current_load_cmd_addr,
-               hdr, data_.current_uuid);
+      FindUUID((const load_command *)data_.current_load_cmd_addr, hdr,
+               data_.current_uuid);
       data_.current_instrumented = IsModuleInstrumented(
-          (const load_command *)data_.current_load_cmd_addr,
-          hdr);
+          (const load_command *)data_.current_load_cmd_addr, hdr);
     }
 
     while (data_.current_load_cmd_count > 0) {
diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_mac_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_mac_test.cpp
index 1720a016d8a41..7547528e2adb8 100644
--- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_mac_test.cpp
+++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_procmaps_mac_test.cpp
@@ -37,18 +37,15 @@ class MemoryMappingLayoutMock final : public MemoryMappingLayout {
     .uuid = {}
   };
 
-  static constexpr char libclang_rt_dylib_name[]    = "libclang_rt.\0\0\0"; // 8 bytes aligned, padded with zeros per loader.h
-  static constexpr char uninstrumented_dylib_name[] = "uninst___rt.\0\0\0"; // 8 bytes aligned, padded with zeros per loader.h
+  static constexpr char libclang_rt_dylib_name[] =
+      "libclang_rt.\0\0\0";  // 8 bytes aligned, padded with zeros per loader.h
+  static constexpr char uninstrumented_dylib_name[] =
+      "uninst___rt.\0\0\0";  // 8 bytes aligned, padded with zeros per loader.h
 
   static constexpr dylib_command mock_dylib_command = {
-    .cmd = LC_LOAD_DYLIB,
-    .cmdsize = sizeof(dylib_command) + sizeof(libclang_rt_dylib_name),
-    .dylib = {
-      .name = {
-        .offset = sizeof(dylib_command)
-      }
-    }
-  };
+      .cmd = LC_LOAD_DYLIB,
+      .cmdsize = sizeof(dylib_command) + sizeof(libclang_rt_dylib_name),
+      .dylib = {.name = {.offset = sizeof(dylib_command)}}};
 
   static constexpr uuid_command mock_trap_command = {
     .cmd = LC_UUID,
@@ -61,52 +58,57 @@ class MemoryMappingLayoutMock final : public MemoryMappingLayout {
   std::vector<unsigned char> mock_header;
 
 public:
-  MemoryMappingLayoutMock(bool instrumented): MemoryMappingLayout(false) {
-    EXPECT_EQ(mock_uuid_command.cmdsize % 8, 0u);
-    EXPECT_EQ(mock_dylib_command.cmdsize % 8, 0u);
-
-    Reset();
-
-#ifdef MH_MAGIC_64
-    const struct mach_header_64 *header = (mach_header_64 *)_dyld_get_image_header(0); // Any header will do
-    const size_t header_size = sizeof(mach_header_64);
-#else
-    const struct mach_header *header = _dyld_get_image_header(0);
-    const size_t header_size = sizeof(mach_header);
-#endif
-    const size_t mock_header_size_with_extras = header_size + header->sizeofcmds +
-      mock_uuid_command.cmdsize + mock_dylib_command.cmdsize + sizeof(uuid_command);
-
-    mock_header.reserve(mock_header_size_with_extras);
-    // Copy the original header
-    copy((unsigned char *)header,
-      (unsigned char *)header + header_size + header->sizeofcmds,
-      back_inserter(mock_header));
-    // The following commands are not supposed to be processed
-    // by the (correct) ::Next method at all, since they're not
-    // accounted for in header->ncmds .
-    copy((unsigned char *)&mock_uuid_command,
-      ((unsigned char *)&mock_uuid_command) + mock_uuid_command.cmdsize,
-      back_inserter(mock_header));
-    copy((unsigned char *)&mock_dylib_command,
-      ((unsigned char *)&mock_dylib_command) + sizeof(dylib_command), // as mock_dylib_command.cmdsize contains the following string
-      back_inserter(mock_header));
-    const char (&dylib_name)[16] = instrumented ? libclang_rt_dylib_name : uninstrumented_dylib_name;
-    copy((unsigned char *)dylib_name,
-      ((unsigned char *)dylib_name) + sizeof(dylib_name),
-      back_inserter(mock_header));
-
-    // Append a command w. huge size to have the test detect the read overrun
-    copy((unsigned char *)&mock_trap_command,
-      ((unsigned char *)&mock_trap_command) + sizeof(uuid_command),
-      back_inserter(mock_header));
-
-    start_load_cmd_addr = (const char *)(mock_header.data() + header_size);
-    sizeofcmds = header->sizeofcmds;
-
-    const char *last_byte_load_cmd_addr = (start_load_cmd_addr+sizeofcmds-1);
-    data_.current_image = -1; // So the loop in ::Next runs just once
-  }
+ MemoryMappingLayoutMock(bool instrumented) : MemoryMappingLayout(false) {
+   EXPECT_EQ(mock_uuid_command.cmdsize % 8, 0u);
+   EXPECT_EQ(mock_dylib_command.cmdsize % 8, 0u);
+
+   Reset();
+
+#  ifdef MH_MAGIC_64
+   const struct mach_header_64 *header =
+       (mach_header_64 *)_dyld_get_image_header(0);  // Any header will do
+   const size_t header_size = sizeof(mach_header_64);
+#  else
+   const struct mach_header *header = _dyld_get_image_header(0);
+   const size_t header_size = sizeof(mach_header);
+#  endif
+   const size_t mock_header_size_with_extras =
+       header_size + header->sizeofcmds + mock_uuid_command.cmdsize +
+       mock_dylib_command.cmdsize + sizeof(uuid_command);
+
+   mock_header.reserve(mock_header_size_with_extras);
+   // Copy the original header
+   copy((unsigned char *)header,
+        (unsigned char *)header + header_size + header->sizeofcmds,
+        back_inserter(mock_header));
+   // The following commands are not supposed to be processed
+   // by the (correct) ::Next method at all, since they're not
+   // accounted for in header->ncmds .
+   copy((unsigned char *)&mock_uuid_command,
+        ((unsigned char *)&mock_uuid_command) + mock_uuid_command.cmdsize,
+        back_inserter(mock_header));
+   copy((unsigned char *)&mock_dylib_command,
+        ((unsigned char *)&mock_dylib_command) +
+            sizeof(dylib_command),  // as mock_dylib_command.cmdsize contains
+                                    // the following string
+        back_inserter(mock_header));
+   const char(&dylib_name)[16] =
+       instrumented ? libclang_rt_dylib_name : uninstrumented_dylib_name;
+   copy((unsigned char *)dylib_name,
+        ((unsigned char *)dylib_name) + sizeof(dylib_name),
+        back_inserter(mock_header));
+
+   // Append a command w. huge size to have the test detect the read overrun
+   copy((unsigned char *)&mock_trap_command,
+        ((unsigned char *)&mock_trap_command) + sizeof(uuid_command),
+        back_inserter(mock_header));
+
+   start_load_cmd_addr = (const char *)(mock_header.data() + header_size);
+   sizeofcmds = header->sizeofcmds;
+
+   const char *last_byte_load_cmd_addr = (start_load_cmd_addr + sizeofcmds - 1);
+   data_.current_image = -1;  // So the loop in ::Next runs just once
+ }
 
   size_t SizeOfLoadCommands() {
     return sizeofcmds;
@@ -132,7 +134,7 @@ TEST(MemoryMappingLayout, NextInstrumented) {
     EXPECT_LE(offset, size);
   }
   size_t final_offset = memory_mapping.CurrentLoadCommandOffset();
-  EXPECT_EQ(final_offset, size); // All commands processed, no more, no less
+  EXPECT_EQ(final_offset, size);  // All commands processed, no more, no less
 }
 
 TEST(MemoryMappingLayout, NextUnInstrumented) {

>From c4e232f574536de0cc3db4894f404a9aa3f10e1a Mon Sep 17 00:00:00 2001
From: Mariusz Borsa <m_borsa at apple.com>
Date: Fri, 7 Mar 2025 14:07:18 -0800
Subject: [PATCH 3/3] [Sanitizers][Darwin][NFC] Apply PR feedback

Fix 2 nits

rdar://143903403
---
 compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
index a2b3a61e6e1e1..4f909708d3a75 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
@@ -335,14 +335,14 @@ static const load_command *NextCommand(const load_command *lc) {
 }
 
 #  ifdef MH_MAGIC_64
-static const size_t header_size = sizeof(mach_header_64);
+static constexpr size_t header_size = sizeof(mach_header_64);
 #  else
-static const size_t header_size = sizeof(mach_header);
+static constexpr size_t header_size = sizeof(mach_header);
 #  endif
 
 static void FindUUID(const load_command *first_lc, const mach_header *hdr,
                      u8 *uuid_output) {
-  unsigned short curcmd = 0;
+  uint32_t curcmd = 0;
   for (const load_command *lc = first_lc; curcmd < hdr->ncmds;
        curcmd++, lc = NextCommand(lc)) {
     CHECK_LT((const char *)lc,



More information about the llvm-commits mailing list