[Lldb-commits] [lldb] r182065 - Implement ObjectFileELF::GetModuleSpecifications(), and add PlatformLinux code to deal with unknown arch properties.

Mike Sartain mikesart at valvesoftware.com
Tue May 21 15:32:27 PDT 2013


> Anything that can be read from the binary to help figure out the vendor and OS would be great, but it would need to be bulletproof.

Spent the last couple days investigating elf headers and note sections, and bulletproof isn't the word I'd use to describe any of it. The "file" command on Linux (and readelf -n) just parse and dump elf note sections. I removed and hex edited those sections into all kinds of crazy values, and the binaries continue to run just fine.

The result is the patch down below. Few notes:
 - ObjectFileELF::GetModuleSpecifications on Linux should work now.
 - Which means "platform process list" should as well.
 - We are now parsing the elf build-id if it exists, which should allow us to load stripped symbols (looking at that next).

Thanks.
 -Mike

https://gist.github.com/mikesartain/5623734

Index: include/lldb/Core/UUID.h
===================================================================
--- include/lldb/Core/UUID.h	(revision 182424)
+++ include/lldb/Core/UUID.h	(working copy)
@@ -22,7 +22,8 @@
 class UUID
 {
 public:
-    typedef uint8_t ValueType[16];
+    // Most UUIDs are 16 bytes, but some Linux build-ids (SHA1) are 20.
+    typedef uint8_t ValueType[20];
 
     //------------------------------------------------------------------
     // Constructors and Destructors
@@ -45,20 +46,20 @@
     const void *
     GetBytes() const;
 
-    static size_t
+    size_t
     GetByteSize();
 
     bool
     IsValid () const;
 
     void
-    SetBytes (const void *uuid_bytes);
+    SetBytes (const void *uuid_bytes, uint32_t num_uuid_bytes = 16);
 
     std::string
     GetAsString () const;
 
     size_t
-    SetFromCString (const char *c_str);
+    SetFromCString (const char *c_str, uint32_t num_uuid_bytes = 16);
 
     // Decode as many UUID bytes (up to 16) as possible from the C string "cstr"
     // This is used for auto completion where a partial UUID might have been
@@ -86,12 +87,13 @@
     ///     which should be 16 if a full UUID value was properly decoded.
     //------------------------------------------------------------------
     static size_t
-    DecodeUUIDBytesFromCString (const char *cstr, ValueType &uuid_bytes, const char **end);
+    DecodeUUIDBytesFromCString (const char *cstr, ValueType &uuid_bytes, const char **end, uint32_t num_uuid_bytes = 16);
     
 protected:
     //------------------------------------------------------------------
     // Classes that inherit from UUID can see and modify these
     //------------------------------------------------------------------
+    uint32_t m_num_uuid_bytes; // Should be 16 or 20
     ValueType m_uuid;
 };
 
Index: source/Core/UUID.cpp
===================================================================
--- source/Core/UUID.cpp	(revision 182424)
+++ source/Core/UUID.cpp	(working copy)
@@ -22,29 +22,30 @@
 
 namespace lldb_private {
 
-UUID::UUID()
+UUID::UUID() : m_num_uuid_bytes(16)
 {
     ::memset (m_uuid, 0, sizeof(m_uuid));
 }
 
 UUID::UUID(const UUID& rhs)
 {
+    m_num_uuid_bytes = rhs.m_num_uuid_bytes;
     ::memcpy (m_uuid, rhs.m_uuid, sizeof (m_uuid));
 }
 
 UUID::UUID (const void *uuid_bytes, uint32_t num_uuid_bytes)
 {
-    if (uuid_bytes && num_uuid_bytes >= 16)
-        ::memcpy (m_uuid, uuid_bytes, sizeof (m_uuid));
-    else
-        ::memset (m_uuid, 0, sizeof(m_uuid));
+	SetBytes (uuid_bytes, num_uuid_bytes);
 }
 
 const UUID&
 UUID::operator=(const UUID& rhs)
 {
     if (this != &rhs)
+	{
+		m_num_uuid_bytes = rhs.m_num_uuid_bytes;
         ::memcpy (m_uuid, rhs.m_uuid, sizeof (m_uuid));
+	}
     return *this;
 }
 
@@ -55,6 +56,7 @@
 void
 UUID::Clear()
 {
+	m_num_uuid_bytes = 16;
     ::memset (m_uuid, 0, sizeof(m_uuid));
 }
 
@@ -70,12 +72,17 @@
     std::string result;
     char buf[64];
     const uint8_t *u = (const uint8_t *)GetBytes();
-    if (sizeof (buf) > snprintf (buf,
+    if (sizeof (buf) > (size_t)snprintf (buf,
                             sizeof (buf),
                             "%2.2X%2.2X%2.2X%2.2X-%2.2X%2.2X-%2.2X%2.2X-%2.2X%2.2X-%2.2X%2.2X%2.2X%2.2X%2.2X%2.2X",
                             u[0],u[1],u[2],u[3],u[4],u[5],u[6],u[7],u[8],u[9],u[10],u[11],u[12],u[13],u[14],u[15]))
     {
         result.append (buf);
+        if (m_num_uuid_bytes == 20)
+        {
+            if (sizeof (buf) > (size_t)snprintf (buf, sizeof (buf), "-%2.2X%2.2X%2.2X%2.2X", u[16],u[17],u[18],u[19]))
+                result.append (buf);
+        }
     }
     return result;
 }
@@ -86,21 +93,37 @@
     const uint8_t *u = (const uint8_t *)GetBytes();
     s->Printf ("%2.2X%2.2X%2.2X%2.2X-%2.2X%2.2X-%2.2X%2.2X-%2.2X%2.2X-%2.2X%2.2X%2.2X%2.2X%2.2X%2.2X",
               u[0],u[1],u[2],u[3],u[4],u[5],u[6],u[7],u[8],u[9],u[10],u[11],u[12],u[13],u[14],u[15]);
+    if (m_num_uuid_bytes == 20)
+    {
+        s->Printf ("-%2.2X%2.2X%2.2X%2.2X", u[16],u[17],u[18],u[19]);
+    }
 }
 
 void
-UUID::SetBytes (const void *uuid_bytes)
+UUID::SetBytes (const void *uuid_bytes, uint32_t num_uuid_bytes)
 {
-    if (uuid_bytes)
-        ::memcpy (m_uuid, uuid_bytes, sizeof (m_uuid));
+    if (uuid_bytes && num_uuid_bytes >= 20)
+    {
+        m_num_uuid_bytes = 20;
+        ::memcpy (m_uuid, uuid_bytes, m_num_uuid_bytes);
+    }
+    else if (uuid_bytes && num_uuid_bytes >= 16)
+    {
+        m_num_uuid_bytes = 16;
+        ::memcpy (m_uuid, uuid_bytes, m_num_uuid_bytes);
+        m_uuid[16] = m_uuid[17] = m_uuid[18] = m_uuid[19] = 0;
+    }
     else
+    {
+		m_num_uuid_bytes = 16;
         ::memset (m_uuid, 0, sizeof(m_uuid));
+    }
 }
 
 size_t
 UUID::GetByteSize()
 {
-    return sizeof(UUID::ValueType);
+    return m_num_uuid_bytes;
 }
 
 bool
@@ -121,7 +144,11 @@
             m_uuid[12] ||
             m_uuid[13] ||
             m_uuid[14] ||
-            m_uuid[15];
+            m_uuid[15] ||
+            m_uuid[16] ||
+            m_uuid[17] ||
+            m_uuid[18] ||
+            m_uuid[19];
 }
 
 static inline int
@@ -134,7 +161,7 @@
 }
 
 size_t
-UUID::DecodeUUIDBytesFromCString (const char *p, ValueType &uuid_bytes, const char **end)
+UUID::DecodeUUIDBytesFromCString (const char *p, ValueType &uuid_bytes, const char **end, uint32_t num_uuid_bytes)
 {
     size_t uuid_byte_idx = 0;
     if (p)
@@ -153,7 +180,7 @@
                 
                 // Increment the byte that we are decoding within the UUID value
                 // and break out if we are done
-                if (++uuid_byte_idx == 16)
+                if (++uuid_byte_idx == num_uuid_bytes)
                     break;
             }
             else if (*p == '-')
@@ -170,10 +197,13 @@
     }
     if (end)
         *end = p;
+    // Clear trailing bytes to 0.
+    for (uint32_t i = uuid_byte_idx; i < sizeof(ValueType); i++)
+        uuid_bytes[i] = 0;
     return uuid_byte_idx;
 }
 size_t
-UUID::SetFromCString (const char *cstr)
+UUID::SetFromCString (const char *cstr, uint32_t num_uuid_bytes)
 {
     if (cstr == NULL)
         return 0;
@@ -184,11 +214,11 @@
     while (isspace(*p))
         ++p;
     
-    const size_t uuid_byte_idx = UUID::DecodeUUIDBytesFromCString (p, m_uuid, &p);
+    const size_t uuid_byte_idx = UUID::DecodeUUIDBytesFromCString (p, m_uuid, &p, num_uuid_bytes);
 
     // If we successfully decoded a UUID, return the amount of characters that
     // were consumed
-    if (uuid_byte_idx == 16)
+    if (uuid_byte_idx == num_uuid_bytes)
         return p - cstr;
 
     // Else return zero to indicate we were not able to parse a UUID value
@@ -200,35 +230,35 @@
 bool
 lldb_private::operator == (const lldb_private::UUID &lhs, const lldb_private::UUID &rhs)
 {
-    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), lldb_private::UUID::GetByteSize()) == 0;
+    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), sizeof (lldb_private::UUID::ValueType)) == 0;
 }
 
 bool
 lldb_private::operator != (const lldb_private::UUID &lhs, const lldb_private::UUID &rhs)
 {
-    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), lldb_private::UUID::GetByteSize()) != 0;
+    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), sizeof (lldb_private::UUID::ValueType)) != 0;
 }
 
 bool
 lldb_private::operator <  (const lldb_private::UUID &lhs, const lldb_private::UUID &rhs)
 {
-    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), lldb_private::UUID::GetByteSize()) <  0;
+    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), sizeof (lldb_private::UUID::ValueType)) <  0;
 }
 
 bool
 lldb_private::operator <= (const lldb_private::UUID &lhs, const lldb_private::UUID &rhs)
 {
-    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), lldb_private::UUID::GetByteSize()) <= 0;
+    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), sizeof (lldb_private::UUID::ValueType)) <= 0;
 }
 
 bool
 lldb_private::operator >  (const lldb_private::UUID &lhs, const lldb_private::UUID &rhs)
 {
-    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), lldb_private::UUID::GetByteSize()) >  0;
+    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), sizeof (lldb_private::UUID::ValueType)) >  0;
 }
 
 bool
 lldb_private::operator >= (const lldb_private::UUID &lhs, const lldb_private::UUID &rhs)
 {
-    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), lldb_private::UUID::GetByteSize()) >= 0;
+    return ::memcmp (lhs.GetBytes(), rhs.GetBytes(), sizeof (lldb_private::UUID::ValueType)) >= 0;
 }
Index: source/Host/linux/Host.cpp
===================================================================
--- source/Host/linux/Host.cpp	(revision 182424)
+++ source/Host/linux/Host.cpp	(working copy)
@@ -25,6 +25,9 @@
 #include "lldb/Core/DataBufferHeap.h"
 #include "lldb/Core/DataExtractor.h"
 
+#include "lldb/Core/ModuleSpec.h"
+#include "lldb/Symbol/ObjectFile.h"
+
 using namespace lldb;
 using namespace lldb_private;
 
@@ -293,6 +296,30 @@
 }
 
 static bool
+GetELFProcessCPUType (const char *exe_path, ProcessInstanceInfo &process_info)
+{
+    // Clear the architecture.
+    process_info.GetArchitecture().Clear();
+
+    ModuleSpecList specs;
+    FileSpec filespec (exe_path, false);
+    const size_t num_specs = ObjectFile::GetModuleSpecifications (filespec, 0, specs);
+	// GetModuleSpecifications() could fail if the executable has been deleted or is locked.
+	// But it shouldn't return more than 1 architecture.
+	assert(num_specs <= 1 && "Linux plugin supports only a single architecture");
+    if (num_specs == 1)
+    {
+        ModuleSpec module_spec;
+        if (specs.GetModuleSpecAtIndex (0, module_spec) && module_spec.GetArchitecture().IsValid())
+        {
+            process_info.GetArchitecture () = module_spec.GetArchitecture();
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool
 GetProcessAndStatInfo (lldb::pid_t pid, ProcessInstanceInfo &process_info, ProcessStatInfo &stat_info, lldb::pid_t &tracerpid)
 {
     tracerpid = 0;
@@ -300,9 +327,6 @@
     ::memset (&stat_info, 0, sizeof(stat_info));
     stat_info.ppid = LLDB_INVALID_PROCESS_ID;
 
-    // Architecture is intentionally omitted because that's better resolved
-    // in other places (see ProcessPOSIX::DoAttachWithID().
-
     // Use special code here because proc/[pid]/exe is a symbolic link.
     char link_path[PATH_MAX];
     char exe_path[PATH_MAX] = "";
@@ -324,6 +348,10 @@
     {
         exe_path[len - deleted_len] = 0;
     }
+    else
+    {
+        GetELFProcessCPUType (exe_path, process_info);
+    }
 
     process_info.SetProcessID(pid);
     process_info.GetExecutableFile().SetFile(exe_path, false);
Index: source/Plugins/ObjectFile/ELF/ELFHeader.h
===================================================================
--- source/Plugins/ObjectFile/ELF/ELFHeader.h	(revision 182424)
+++ source/Plugins/ObjectFile/ELF/ELFHeader.h	(working copy)
@@ -107,7 +107,7 @@
     GetRelocationJumpSlotType() const;
 
     //--------------------------------------------------------------------------
-    /// Parse an ELFSectionHeader entry starting at position \p offset and
+    /// Parse an ELFHeader entry starting at position \p offset and
     /// update the data extractor with the address size and byte order
     /// attributes as defined by the header.
     ///
@@ -120,7 +120,7 @@
     ///    advanced by the number of bytes read.
     ///
     /// @return
-    ///    True if the ELFSectionHeader was successfully read and false
+    ///    True if the ELFHeader was successfully read and false
     ///    otherwise.
     bool
     Parse(lldb_private::DataExtractor &data, lldb::offset_t *offset);
Index: source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
===================================================================
--- source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp	(revision 182424)
+++ source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp	(working copy)
@@ -243,11 +243,8 @@
                                         lldb::offset_t length,
                                         lldb_private::ModuleSpecList &specs)
 {
-// FIXME: mikesart at valvesoftware.com
-// Implementing this function has broken several tests. Specifically this one:
-// Python dotest.py --executable <path-to-lldb> -p TestCallStdStringFunction.py
     const size_t initial_count = specs.GetSize();
-#if 0    
+
     if (ObjectFileELF::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize()))
     {
         DataExtractor data;
@@ -264,15 +261,20 @@
                                                        LLDB_INVALID_CPUTYPE);
                 if (spec.GetArchitecture().IsValid())
                 {
-                    // ObjectFileMachO adds the UUID here also, but that isn't in the elf header
-                    // so we'd have to read the entire file in and calculate the md5sum.
-                    // That'd be bad for this routine...
+                    // We could parse the ABI tag information (in .note, .notes, or .note.ABI-tag) to get the
+                    // machine information. However, we'd have to read a good bit of the rest of the file,
+                    // and this info isn't guaranteed to exist or be correct. More details here:
+                    //  http://refspecs.linuxfoundation.org/LSB_1.2.0/gLSB/noteabitag.html
+					// Instead of passing potentially incorrect information down the pipeline, grab
+					// the host information and use it.
+                    spec.GetArchitecture().GetTriple().setOSName (Host::GetOSString().GetCString());
+                    spec.GetArchitecture().GetTriple().setVendorName(Host::GetVendorString().GetCString());
                     specs.Append(spec);
                 }
             }
         }
     }
-#endif
+
     return specs.GetSize() - initial_count;
 }
 
@@ -360,7 +362,12 @@
 bool
 ObjectFileELF::GetUUID(lldb_private::UUID* uuid)
 {
-    // FIXME: Return MD5 sum here.  See comment in ObjectFile.h.
+    if (m_uuid.IsValid())
+    {
+        *uuid = m_uuid;
+        return true;
+    }
+    // FIXME: Return MD5 sum here. See comment in ObjectFile.h.
     return false;
 }
 
@@ -661,6 +668,51 @@
     return NULL;
 }
 
+static bool
+ParseNoteGNUBuildID(DataExtractor& data, lldb_private::UUID& uuid)
+{
+	// Try to parse the note section (ie .note.gnu.build-id|.notes|.note|...) and get the build id.
+    // BuildID documentation: https://fedoraproject.org/wiki/Releases/FeatureBuildId
+    struct
+    {
+        uint32_t name_len;  // Length of note name
+        uint32_t desc_len;  // Length of note descriptor
+        uint32_t type;      // Type of note (1 is ABI_TAG, 3 is BUILD_ID)
+    } notehdr;
+    lldb::offset_t offset = 0;
+    static const uint32_t g_gnu_build_id = 3; // NT_GNU_BUILD_ID from elf.h
+
+    while (true)
+    {
+        if (data.GetU32 (&offset, &notehdr, 3) == NULL)
+            return false;
+
+        notehdr.name_len = llvm::RoundUpToAlignment (notehdr.name_len, 4);
+        notehdr.desc_len = llvm::RoundUpToAlignment (notehdr.desc_len, 4);
+
+        lldb::offset_t offset_next_note = offset + notehdr.name_len + notehdr.desc_len;
+
+        // 16 bytes is UUID|MD5, 20 bytes is SHA1
+        if ((notehdr.type == g_gnu_build_id) && (notehdr.name_len == 4) &&
+            (notehdr.desc_len == 16 || notehdr.desc_len == 20))
+        {
+            char name[4];
+            if (data.GetU8 (&offset, name, 4) == NULL)
+                return false;
+            if (!strcmp(name, "GNU"))
+            {
+                uint8_t uuidbuf[20]; 
+                if (data.GetU8 (&offset, &uuidbuf, notehdr.desc_len) == NULL)
+                    return false;
+                uuid.SetBytes (uuidbuf, notehdr.desc_len);
+                return true;
+            }
+        }
+        offset = offset_next_note;
+    }
+    return false;
+}
+ 
 SectionList *
 ObjectFileELF::GetSectionList()
 {
@@ -727,8 +779,18 @@
             else if (name == g_sect_name_dwarf_debug_ranges)    sect_type = eSectionTypeDWARFDebugRanges;
             else if (name == g_sect_name_dwarf_debug_str)       sect_type = eSectionTypeDWARFDebugStr;
             else if (name == g_sect_name_eh_frame)              sect_type = eSectionTypeEHFrame;
+            else if (header.sh_type == SHT_NOTE)
+            {
+                if (!m_uuid.IsValid())
+                {
+                    DataExtractor data;
+                    if (vm_size && (GetData (header.sh_offset, vm_size, data) == vm_size))
+                    {
+                        ParseNoteGNUBuildID (data, m_uuid);
+                    }
+                }
+            }
             
-            
             SectionSP section_sp(new Section(
                 GetModule(),        // Module to which this section belongs.
                 SectionIndex(I),    // Section ID.
Index: source/Plugins/ObjectFile/ELF/ObjectFileELF.h
===================================================================
--- source/Plugins/ObjectFile/ELF/ObjectFileELF.h	(revision 182424)
+++ source/Plugins/ObjectFile/ELF/ObjectFileELF.h	(working copy)
@@ -16,6 +16,7 @@
 #include "lldb/lldb-private.h"
 #include "lldb/Host/FileSpec.h"
 #include "lldb/Symbol/ObjectFile.h"
+#include "lldb/Core/UUID.h"
 
 #include "ELFHeader.h"
 
@@ -154,6 +155,9 @@
     /// ELF file header.
     elf::ELFHeader m_header;
 
+    /// ELF build ID
+    lldb_private::UUID m_uuid;
+
     /// Collection of program headers.
     ProgramHeaderColl m_program_headers;




More information about the lldb-commits mailing list