[Lldb-commits] [lldb] r335963 - UUID: Add support for arbitrary-sized module IDs

Pavel Labath via lldb-commits lldb-commits at lists.llvm.org
Fri Jun 29 04:20:29 PDT 2018


Author: labath
Date: Fri Jun 29 04:20:29 2018
New Revision: 335963

URL: http://llvm.org/viewvc/llvm-project?rev=335963&view=rev
Log:
UUID: Add support for arbitrary-sized module IDs

Summary:
The data structure is optimized for the case where the UUID size is <=
20 bytes (standard length emitted by the GNU linkers), but larger sizes
are also possible.

I've modified the string conversion function to support the new sizes as
well. For standard UUIDs it maintains the traditional formatting
(4-2-2-2-6). If a UUID is shorter, we just cut this sequence short, and
for longer UUIDs it will just repeat the last 6-byte block as long as
necessary.

I've also modified ObjectFileELF to take advantage of the new UUIDs and
avoid manually padding the UUID to 16 bytes. While there, I also made
sure the computed UUID does not depend on host endianness.

Reviewers: clayborg, lemo, sas, davide, espindola

Subscribers: emaste, arichardson, lldb-commits

Differential Revision: https://reviews.llvm.org/D48633

Modified:
    lldb/trunk/include/lldb/Utility/UUID.h
    lldb/trunk/source/Interpreter/OptionValueUUID.cpp
    lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
    lldb/trunk/source/Utility/UUID.cpp
    lldb/trunk/unittests/Utility/UUIDTest.cpp

Modified: lldb/trunk/include/lldb/Utility/UUID.h
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/include/lldb/Utility/UUID.h?rev=335963&r1=335962&r2=335963&view=diff
==============================================================================
--- lldb/trunk/include/lldb/Utility/UUID.h (original)
+++ lldb/trunk/include/lldb/Utility/UUID.h Fri Jun 29 04:20:29 2018
@@ -27,12 +27,6 @@ namespace lldb_private {
 
 class UUID {
 public:
-  // Most UUIDs are 16 bytes, but some Linux build-ids (SHA1) are 20.
-  typedef uint8_t ValueType[20];
-
-  //------------------------------------------------------------------
-  // Constructors and Destructors
-  //------------------------------------------------------------------
   UUID() = default;
 
   /// Creates a UUID from the data pointed to by the bytes argument. No special
@@ -64,18 +58,16 @@ public:
     return UUID(bytes);
   }
 
-  void Clear() { m_num_uuid_bytes = 0; }
+  void Clear() { m_bytes.clear(); }
 
   void Dump(Stream *s) const;
 
-  llvm::ArrayRef<uint8_t> GetBytes() const {
-    return {m_uuid, m_num_uuid_bytes};
-  }
+  llvm::ArrayRef<uint8_t> GetBytes() const { return m_bytes; }
 
   explicit operator bool() const { return IsValid(); }
-  bool IsValid() const { return m_num_uuid_bytes > 0; }
+  bool IsValid() const { return !m_bytes.empty(); }
 
-  std::string GetAsString(const char *separator = nullptr) const;
+  std::string GetAsString(llvm::StringRef separator = "-") const;
 
   size_t SetFromStringRef(llvm::StringRef str, uint32_t num_uuid_bytes = 16);
 
@@ -99,24 +91,34 @@ public:
   ///     The original string, with all decoded bytes removed.
   //------------------------------------------------------------------
   static llvm::StringRef
-  DecodeUUIDBytesFromString(llvm::StringRef str, ValueType &uuid_bytes,
-                            uint32_t &bytes_decoded,
+  DecodeUUIDBytesFromString(llvm::StringRef str,
+                            llvm::SmallVectorImpl<uint8_t> &uuid_bytes,
                             uint32_t num_uuid_bytes = 16);
 
 private:
-  UUID(llvm::ArrayRef<uint8_t> bytes);
-
-  uint32_t m_num_uuid_bytes = 0; // Should be 0, 16 or 20
-  ValueType m_uuid;
-};
+  UUID(llvm::ArrayRef<uint8_t> bytes) : m_bytes(bytes.begin(), bytes.end()) {}
 
-bool operator==(const UUID &lhs, const UUID &rhs);
-bool operator!=(const UUID &lhs, const UUID &rhs);
-bool operator<(const UUID &lhs, const UUID &rhs);
-bool operator<=(const UUID &lhs, const UUID &rhs);
-bool operator>(const UUID &lhs, const UUID &rhs);
-bool operator>=(const UUID &lhs, const UUID &rhs);
+  // GNU ld generates 20-byte build-ids. Size chosen to avoid heap allocations
+  // for this case.
+  llvm::SmallVector<uint8_t, 20> m_bytes;
 
+  friend bool operator==(const UUID &LHS, const UUID &RHS) {
+    return LHS.m_bytes == RHS.m_bytes;
+  }
+  friend bool operator!=(const UUID &LHS, const UUID &RHS) {
+    return !(LHS == RHS);
+  }
+  friend bool operator<(const UUID &LHS, const UUID &RHS) {
+    return LHS.m_bytes < RHS.m_bytes;
+  }
+  friend bool operator<=(const UUID &LHS, const UUID &RHS) {
+    return !(RHS < LHS);
+  }
+  friend bool operator>(const UUID &LHS, const UUID &RHS) { return RHS < LHS; }
+  friend bool operator>=(const UUID &LHS, const UUID &RHS) {
+    return !(LHS < RHS);
+  }
+};
 } // namespace lldb_private
 
 #endif // LLDB_UTILITY_UUID_H

Modified: lldb/trunk/source/Interpreter/OptionValueUUID.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Interpreter/OptionValueUUID.cpp?rev=335963&r1=335962&r2=335963&view=diff
==============================================================================
--- lldb/trunk/source/Interpreter/OptionValueUUID.cpp (original)
+++ lldb/trunk/source/Interpreter/OptionValueUUID.cpp Fri Jun 29 04:20:29 2018
@@ -76,21 +76,17 @@ size_t OptionValueUUID::AutoComplete(Com
   ExecutionContext exe_ctx(interpreter.GetExecutionContext());
   Target *target = exe_ctx.GetTargetPtr();
   if (target) {
-    const size_t num_modules = target->GetImages().GetSize();
-    if (num_modules > 0) {
-      UUID::ValueType uuid_bytes;
-      uint32_t num_bytes_decoded = 0;
-      UUID::DecodeUUIDBytesFromString(s, uuid_bytes, num_bytes_decoded);
+    llvm::SmallVector<uint8_t, 20> uuid_bytes;
+    if (UUID::DecodeUUIDBytesFromString(s, uuid_bytes).empty()) {
+      const size_t num_modules = target->GetImages().GetSize();
       for (size_t i = 0; i < num_modules; ++i) {
         ModuleSP module_sp(target->GetImages().GetModuleAtIndex(i));
         if (module_sp) {
           const UUID &module_uuid = module_sp->GetUUID();
           if (module_uuid.IsValid()) {
-            llvm::ArrayRef<uint8_t> decoded_bytes(uuid_bytes,
-                                                  num_bytes_decoded);
             llvm::ArrayRef<uint8_t> module_bytes = module_uuid.GetBytes();
-            if (module_bytes.size() >= num_bytes_decoded &&
-                module_bytes.take_front(num_bytes_decoded) == decoded_bytes) {
+            if (module_bytes.size() >= uuid_bytes.size() &&
+                module_bytes.take_front(uuid_bytes.size()).equals(uuid_bytes)) {
               matches.AppendString(module_uuid.GetAsString());
             }
           }

Modified: lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp?rev=335963&r1=335962&r2=335963&view=diff
==============================================================================
--- lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp (original)
+++ lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp Fri Jun 29 04:20:29 2018
@@ -730,16 +730,17 @@ size_t ObjectFileELF::GetModuleSpecifica
                     data.GetDataStart(), data.GetByteSize());
               }
             }
+            using u32le = llvm::support::ulittle32_t;
             if (gnu_debuglink_crc) {
               // Use 4 bytes of crc from the .gnu_debuglink section.
-              uint32_t uuidt[4] = {gnu_debuglink_crc, 0, 0, 0};
-              uuid = UUID::fromData(uuidt, sizeof(uuidt));
+              u32le data(gnu_debuglink_crc);
+              uuid = UUID::fromData(&data, sizeof(data));
             } else if (core_notes_crc) {
               // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make
               // it look different form .gnu_debuglink crc followed by 4 bytes
               // of note segments crc.
-              uint32_t uuidt[4] = {g_core_uuid_magic, core_notes_crc, 0, 0};
-              uuid = UUID::fromData(uuidt, sizeof(uuidt));
+              u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)};
+              uuid = UUID::fromData(data, sizeof(data));
             }
           }
 
@@ -909,6 +910,7 @@ bool ObjectFileELF::GetUUID(lldb_private
   if (!ParseSectionHeaders() && GetType() != ObjectFile::eTypeCoreFile)
     return false;
 
+  using u32le = llvm::support::ulittle32_t;
   if (m_uuid.IsValid()) {
     // We have the full build id uuid.
     *uuid = m_uuid;
@@ -925,8 +927,8 @@ bool ObjectFileELF::GetUUID(lldb_private
       // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make it look
       // different form .gnu_debuglink crc - followed by 4 bytes of note
       // segments crc.
-      uint32_t uuidt[4] = {g_core_uuid_magic, core_notes_crc, 0, 0};
-      m_uuid = UUID::fromData(uuidt, sizeof(uuidt));
+      u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)};
+      m_uuid = UUID::fromData(data, sizeof(data));
     }
   } else {
     if (!m_gnu_debuglink_crc)
@@ -934,8 +936,8 @@ bool ObjectFileELF::GetUUID(lldb_private
           calc_gnu_debuglink_crc32(m_data.GetDataStart(), m_data.GetByteSize());
     if (m_gnu_debuglink_crc) {
       // Use 4 bytes of crc from the .gnu_debuglink section.
-      uint32_t uuidt[4] = {m_gnu_debuglink_crc, 0, 0, 0};
-      m_uuid = UUID::fromData(uuidt, sizeof(uuidt));
+      u32le data(m_gnu_debuglink_crc);
+      m_uuid = UUID::fromData(&data, sizeof(data));
     }
   }
 
@@ -1273,18 +1275,16 @@ ObjectFileELF::RefineModuleDetailsFromNo
         // Only bother processing this if we don't already have the uuid set.
         if (!uuid.IsValid()) {
           // 16 bytes is UUID|MD5, 20 bytes is SHA1. Other linkers may produce a
-          // build-id of a different
-          // length. Accept it as long as it's at least 4 bytes as it will be
-          // better than our own crc32.
-          if (note.n_descsz >= 4 && note.n_descsz <= 20) {
-            uint8_t uuidbuf[20];
-            if (data.GetU8(&offset, &uuidbuf, note.n_descsz) == nullptr) {
+          // build-id of a different length. Accept it as long as it's at least
+          // 4 bytes as it will be better than our own crc32.
+          if (note.n_descsz >= 4) {
+            if (const uint8_t *buf = data.PeekData(offset, note.n_descsz)) {
+              // Save the build id as the UUID for the module.
+              uuid = UUID::fromData(buf, note.n_descsz);
+            } else {
               error.SetErrorString("failed to read GNU_BUILD_ID note payload");
               return error;
             }
-
-            // Save the build id as the UUID for the module.
-            uuid = UUID::fromData(uuidbuf, note.n_descsz);
           }
         }
         break;

Modified: lldb/trunk/source/Utility/UUID.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Utility/UUID.cpp?rev=335963&r1=335962&r2=335963&view=diff
==============================================================================
--- lldb/trunk/source/Utility/UUID.cpp (original)
+++ lldb/trunk/source/Utility/UUID.cpp Fri Jun 29 04:20:29 2018
@@ -13,6 +13,7 @@
 // Project includes
 #include "lldb/Utility/Stream.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Format.h"
 
 // C Includes
 #include <ctype.h>
@@ -21,35 +22,35 @@
 
 using namespace lldb_private;
 
-UUID::UUID(llvm::ArrayRef<uint8_t> bytes) {
-  if (bytes.size() != 20 && bytes.size() != 16)
-    bytes = {};
-
-  m_num_uuid_bytes = bytes.size();
-  std::memcpy(m_uuid, bytes.data(), bytes.size());
+// Whether to put a separator after count uuid bytes.
+// For the first 16 bytes we follow the traditional UUID format. After that, we
+// simply put a dash after every 6 bytes.
+static inline bool separate(size_t count) {
+  if (count >= 10)
+    return (count - 10) % 6 == 0;
+
+  switch (count) {
+  case 4:
+  case 6:
+  case 8:
+    return true;
+  default:
+    return false;
+  }
 }
 
-std::string UUID::GetAsString(const char *separator) const {
+std::string UUID::GetAsString(llvm::StringRef separator) const {
   std::string result;
-  char buf[256];
-  if (!separator)
-    separator = "-";
-  const uint8_t *u = GetBytes().data();
-  if (sizeof(buf) >
-      (size_t)snprintf(buf, sizeof(buf), "%2.2X%2.2X%2.2X%2.2X%s%2.2X%2.2X%s%2."
-                                         "2X%2.2X%s%2.2X%2.2X%s%2.2X%2.2X%2.2X%"
-                                         "2.2X%2.2X%2.2X",
-                       u[0], u[1], u[2], u[3], separator, u[4], u[5], separator,
-                       u[6], u[7], separator, u[8], u[9], separator, u[10],
-                       u[11], u[12], u[13], u[14], u[15])) {
-    result.append(buf);
-    if (m_num_uuid_bytes == 20) {
-      if (sizeof(buf) > (size_t)snprintf(buf, sizeof(buf),
-                                         "%s%2.2X%2.2X%2.2X%2.2X", separator,
-                                         u[16], u[17], u[18], u[19]))
-        result.append(buf);
-    }
+  llvm::raw_string_ostream os(result);
+
+  for (auto B : llvm::enumerate(GetBytes())) {
+    if (separate(B.index()))
+      os << separator;
+
+    os << llvm::format_hex_no_prefix(B.value(), 2, true);
   }
+  os.flush();
+
   return result;
 }
 
@@ -64,25 +65,24 @@ static inline int xdigit_to_int(char ch)
   return ch - '0';
 }
 
-llvm::StringRef UUID::DecodeUUIDBytesFromString(llvm::StringRef p,
-                                                ValueType &uuid_bytes,
-                                                uint32_t &bytes_decoded,
-                                                uint32_t num_uuid_bytes) {
-  ::memset(uuid_bytes, 0, sizeof(uuid_bytes));
-  size_t uuid_byte_idx = 0;
+llvm::StringRef
+UUID::DecodeUUIDBytesFromString(llvm::StringRef p,
+                                llvm::SmallVectorImpl<uint8_t> &uuid_bytes,
+                                uint32_t num_uuid_bytes) {
+  uuid_bytes.clear();
   while (!p.empty()) {
     if (isxdigit(p[0]) && isxdigit(p[1])) {
       int hi_nibble = xdigit_to_int(p[0]);
       int lo_nibble = xdigit_to_int(p[1]);
       // Translate the two hex nibble characters into a byte
-      uuid_bytes[uuid_byte_idx] = (hi_nibble << 4) + lo_nibble;
+      uuid_bytes.push_back((hi_nibble << 4) + lo_nibble);
 
       // Skip both hex digits
       p = p.drop_front(2);
 
       // Increment the byte that we are decoding within the UUID value and
       // break out if we are done
-      if (++uuid_byte_idx == num_uuid_bytes)
+      if (uuid_bytes.size() == num_uuid_bytes)
         break;
     } else if (p.front() == '-') {
       // Skip dashes
@@ -92,11 +92,6 @@ llvm::StringRef UUID::DecodeUUIDBytesFro
       break;
     }
   }
-
-  // Clear trailing bytes to 0.
-  for (uint32_t i = uuid_byte_idx; i < sizeof(ValueType); i++)
-    uuid_bytes[i] = 0;
-  bytes_decoded = uuid_byte_idx;
   return p;
 }
 
@@ -106,52 +101,17 @@ size_t UUID::SetFromStringRef(llvm::Stri
   // Skip leading whitespace characters
   p = p.ltrim();
 
-  ValueType bytes;
-  uint32_t bytes_decoded = 0;
+  llvm::SmallVector<uint8_t, 20> bytes;
   llvm::StringRef rest =
-      UUID::DecodeUUIDBytesFromString(p, bytes, bytes_decoded, num_uuid_bytes);
+      UUID::DecodeUUIDBytesFromString(p, bytes, num_uuid_bytes);
 
   // If we successfully decoded a UUID, return the amount of characters that
   // were consumed
-  if (bytes_decoded == num_uuid_bytes) {
-    *this = fromData(bytes, bytes_decoded);
+  if (bytes.size() == num_uuid_bytes) {
+    *this = fromData(bytes);
     return str.size() - rest.size();
   }
 
   // Else return zero to indicate we were not able to parse a UUID value
   return 0;
 }
-
-bool lldb_private::operator==(const lldb_private::UUID &lhs,
-                              const lldb_private::UUID &rhs) {
-  return lhs.GetBytes() == rhs.GetBytes();
-}
-
-bool lldb_private::operator!=(const lldb_private::UUID &lhs,
-                              const lldb_private::UUID &rhs) {
-  return !(lhs == rhs);
-}
-
-bool lldb_private::operator<(const lldb_private::UUID &lhs,
-                             const lldb_private::UUID &rhs) {
-  if (lhs.GetBytes().size() != rhs.GetBytes().size())
-    return lhs.GetBytes().size() < rhs.GetBytes().size();
-
-  return std::memcmp(lhs.GetBytes().data(), rhs.GetBytes().data(),
-                     lhs.GetBytes().size());
-}
-
-bool lldb_private::operator<=(const lldb_private::UUID &lhs,
-                              const lldb_private::UUID &rhs) {
-  return !(lhs > rhs);
-}
-
-bool lldb_private::operator>(const lldb_private::UUID &lhs,
-                             const lldb_private::UUID &rhs) {
-  return rhs < lhs;
-}
-
-bool lldb_private::operator>=(const lldb_private::UUID &lhs,
-                              const lldb_private::UUID &rhs) {
-  return !(lhs < rhs);
-}

Modified: lldb/trunk/unittests/Utility/UUIDTest.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/unittests/Utility/UUIDTest.cpp?rev=335963&r1=335962&r2=335963&view=diff
==============================================================================
--- lldb/trunk/unittests/Utility/UUIDTest.cpp (original)
+++ lldb/trunk/unittests/Utility/UUIDTest.cpp Fri Jun 29 04:20:29 2018
@@ -71,3 +71,14 @@ TEST(UUIDTest, SetFromStringRef) {
       32u, u.SetFromStringRef("404142434445464748494a4b4c4d4e4f-50515253", 16));
   EXPECT_EQ(UUID::fromData("@ABCDEFGHIJKLMNO", 16), u);
 }
+
+TEST(UUIDTest, StringConverion) {
+  EXPECT_EQ("40414243", UUID::fromData("@ABC", 4).GetAsString());
+  EXPECT_EQ("40414243-4445-4647", UUID::fromData("@ABCDEFG", 8).GetAsString());
+  EXPECT_EQ("40414243-4445-4647-4849-4A4B",
+            UUID::fromData("@ABCDEFGHIJK", 12).GetAsString());
+  EXPECT_EQ("40414243-4445-4647-4849-4A4B4C4D4E4F",
+            UUID::fromData("@ABCDEFGHIJKLMNO", 16).GetAsString());
+  EXPECT_EQ("40414243-4445-4647-4849-4A4B4C4D4E4F-50515253",
+            UUID::fromData("@ABCDEFGHIJKLMNOPQRS", 20).GetAsString());
+}




More information about the lldb-commits mailing list