[Lldb-commits] [lldb] r140750 - in /lldb/trunk/source/Plugins/SymbolFile/DWARF: HashedNameToDIE.cpp HashedNameToDIE.h

Wed Sep 28 17:58:12 PDT 2011

Author: gclayton
Date: Wed Sep 28 19:58:11 2011
New Revision: 140750

URL: http://llvm.org/viewvc/llvm-project?rev=140750&view=rev
Log:
Found a great optimization after speaking with Sean Callanan which cleans
up the implementation details of the on disk hash, these changed implement
the changes in the on disk table format.


Modified:
    lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp
    lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h

Modified: lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp?rev=140750&r1=140749&r2=140750&view=diff
==============================================================================

--- lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp (original)
+++ lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp Wed Sep 28 19:58:11 2011
@@ -42,10 +42,6 @@
     s.Printf ("header.version            = 0x%4.4x", version);
     s.Printf ("header.addr_bytesize      = 0x%2.2x", addr_bytesize);
     s.Printf ("header.hash_function      = 0x%2.2x", hash_function);
-    s.Printf ("header.hash_count_bitsize = 0x%2.2x", hash_count_bitsize);
-    s.Printf ("header.hash_index_bitsize = 0x%2.2x", hash_index_bitsize);
-    s.Printf ("header.hash_bytesize      = 0x%2.2x", hash_bytesize);
-    s.Printf ("header.offset_bytesize    = 0x%2.2x", offset_bytesize);
     s.Printf ("header.bucket_count       = 0x%8.8x %u", bucket_count, bucket_count);
     s.Printf ("header.hashes_count       = 0x%8.8x %u", hashes_count, hashes_count);
     s.Printf ("header.prologue_length    = 0x%8.8x %u", prologue_length, prologue_length);
@@ -70,10 +66,6 @@
     }
     addr_bytesize       = data.GetU8  (&offset);
     hash_function       = data.GetU8  (&offset);
-    hash_count_bitsize  = data.GetU8  (&offset);
-    hash_index_bitsize  = data.GetU8  (&offset);
-    hash_bytesize       = data.GetU8  (&offset);
-    offset_bytesize     = data.GetU8  (&offset);
     bucket_count        = data.GetU32 (&offset);
     hashes_count        = data.GetU32 (&offset);
     prologue_length     = data.GetU32 (&offset);
@@ -154,42 +146,38 @@
 {
     if (m_header.version == 1)
     {
-        const size_t initial_size = die_ofsets.size();
         if (name_cstr && name_cstr[0])
         {
             // Hash the C string
             const uint32_t name_hash = dl_new_hash (name_cstr);
             
+            const uint32_t bucket_count = m_header.bucket_count;
             // Find the correct bucket for the using the hash value
-            const uint32_t bucket_idx = name_hash % m_header.bucket_count;
+            const uint32_t bucket_idx = name_hash % bucket_count;
             
             // Calculate the offset for the bucket entry for the bucket index
             uint32_t offset = GetOffsetOfBucketEntry (bucket_idx);
             
-            // Extract the bucket entry.
-            const uint32_t bucket_entry = m_data.GetU32 (&offset);
-            if (bucket_entry)
+            // Extract the bucket entry which is a hash index. If the hash index
+            // is UINT32_MAX, then the bucket is empty. If it isn't, it is the
+            // index of the hash in the hashes array. We will then iterate through
+            // all hashes as long as they match "bucket_idx" which was calculated
+            // above
+            uint32_t hash_idx = m_data.GetU32 (&offset);
+            if (hash_idx != UINT32_MAX)
             {
-                // The bucket entry is non-zero which means it isn't empty.
-                // The bucket entry is made up of a hash index whose bit width
-                // is m_header.hash_index_bitsize, and a hash count whose value
-                // is the remaining bits in the 32 bit value. Below we extract
-                // the hash index and the hash count
-                const uint32_t hash_idx = bucket_entry & GetHashIndexMask();
-                const uint32_t hash_count = bucket_entry >> m_header.hash_index_bitsize;
-                const uint32_t hash_end_idx = hash_idx + hash_count;
-                // Figure out the offset to the hash value by index
                 uint32_t hash_offset = GetOffsetOfHashValue (hash_idx);
-                for (uint32_t idx = hash_idx; idx < hash_end_idx; ++idx)
+                
+                const size_t initial_size = die_ofsets.size();
+                uint32_t hash;
+                while (((hash = m_data.GetU32 (&hash_offset)) % bucket_count) == bucket_idx)
                 {
-                    // Extract the hash value and see if it matches our string
-                    const uint32_t hash = m_data.GetU32 (&hash_offset);
                     if (hash == name_hash)
                     {
                         // The hash matches, but we still need to verify that the
                         // C string matches in case we have a hash collision. Figure
                         // out the offset for the data associated with this hash entry
-                        offset = GetOffsetOfHashDataOffset (idx);
+                        offset = GetOffsetOfHashDataOffset (hash_idx);
                         uint32_t hash_data_offset = m_data.GetU32 (&offset);
                         uint32_t str_offset;
                         // Now we have the offset to the data for all strings that match
@@ -219,10 +207,12 @@
                             }
                         }
                     }
+                    ++hash_idx;
                 }
+                
+                return die_ofsets.size() - initial_size;
             }
         }
-        return die_ofsets.size() - initial_size;
     }
     return 0;
 }
@@ -231,75 +221,67 @@
 HashedNameToDIE::MemoryTable::Dump (Stream &s)
 {
     if (m_header.version == 1)
-    {
-
-        bool verbose = s.GetVerbose();
-        
+    {        
         if (m_is_apple_names)
             s.PutCString (".apple_names contents:\n");
         else
             s.PutCString (".apple_types contents:\n");
         
         m_header.Dump (s);
-        uint32_t i,j,k;
         uint32_t empty_bucket_count = 0;
         uint32_t hash_collisions = 0;
-        uint32_t bucket_entry_offset = GetOffsetOfBucketEntry (0);
-        for (i=0; i<m_header.bucket_count; ++i)
+        uint32_t hash_idx_offset = GetOffsetOfBucketEntry (0);
+        const uint32_t bucket_count = m_header.bucket_count;
+        for (uint32_t bucket_idx=0; bucket_idx<bucket_count; ++bucket_idx)
         {
-            const uint32_t bucket_entry = m_data.GetU32 (&bucket_entry_offset);
-            s.Printf("bucket[%u] 0x%8.8x", i, bucket_entry);
-            if (bucket_entry)
-            {
-                const uint32_t hash_idx = bucket_entry & GetHashIndexMask();
-                const uint32_t hash_count = bucket_entry >> m_header.hash_index_bitsize;
-                
-                s.Printf(" (hash_idx = %u, hash_count = %u)\n", hash_idx, hash_count);
+            uint32_t hash_idx = m_data.GetU32 (&hash_idx_offset);
+            s.Printf("bucket[%u] ", bucket_idx);
+
+            if (hash_idx != UINT32_MAX)
+            {                
+                s.Printf(" => hash[%u]\n", hash_idx);
                 
-                const uint32_t hash_end_idx = hash_idx + hash_count;
                 uint32_t hash_offset = GetOffsetOfHashValue (hash_idx);
                 uint32_t data_offset = GetOffsetOfHashDataOffset (hash_idx);
                 
-                for (j=hash_idx; j<hash_end_idx; ++j)
+                uint32_t hash;
+                while (((hash = m_data.GetU32 (&hash_offset)) % bucket_count) == bucket_idx)
                 {
-                    const uint32_t hash = m_data.GetU32 (&hash_offset);
                     uint32_t hash_data_offset = m_data.GetU32 (&data_offset);
-                    if (verbose)
-                        s.Printf("  hash[%u] = 0x%8.8x, offset[%u] = 0x%8.8x\n", j, hash, j, hash_data_offset);
-                    else
-                        s.Printf("  hash[%u] = 0x%8.8x\n", j, hash);
+                    s.Printf("  hash[%u] = 0x%8.8x\n", hash_idx, hash);
 
-                    uint32_t string_idx = 0;
+                    uint32_t string_count = 0;
                     uint32_t strp_offset;
                     while ((strp_offset = m_data.GetU32 (&hash_data_offset)) != 0)
                     {
                         const uint32_t num_die_offsets = m_data.GetU32 (&hash_data_offset);
-                        s.Printf("    string[%u] = 0x%8.8x \"%s\", dies[%u] = {", 
-                                 string_idx, 
+                        s.Printf("   str[%u] = 0x%8.8x \"%s\", dies[%u] = {", 
+                                 string_count, 
                                  strp_offset, 
                                  m_string_table.PeekCStr(strp_offset),
                                  num_die_offsets);
-                        ++string_idx;
+                        ++string_count;
                         
-                        for (k=0; k<num_die_offsets; ++k)
+                        for (uint32_t die_idx=0; die_idx<num_die_offsets; ++die_idx)
                         {
                             const uint32_t die_offset = m_data.GetU32 (&hash_data_offset);
                             s.Printf(" 0x%8.8x", die_offset);
                         }
                         s.PutCString (" }\n");
                     }
-                    if (string_idx > 1)
+                    if (string_count > 1)
                         ++hash_collisions;
                 }
             }
             else
             {
-                s.PutCString(" (empty)\n");
+                s.PutCString(" EMPTY\n");
                 ++empty_bucket_count;
             }
+            s.EOL();
         }
-        
-        s.Printf ("%u of %u buckets empty (%2.1f%%)\n",  empty_bucket_count, m_header.bucket_count, (((float)empty_bucket_count/(float)m_header.bucket_count)*100.0f));
+        s.EOL();
+        s.Printf ("%u of %u buckets empty (%2.1f%%)\n",  empty_bucket_count, bucket_count, (((float)empty_bucket_count/(float)m_header.bucket_count)*100.0f));
         s.Printf ("Average hashes/non-empty bucket = %2.1f%%\n", ((float)m_header.hashes_count/(float)(m_header.bucket_count - empty_bucket_count)));
         s.Printf ("Hash collisions = %u\n", hash_collisions);
     }

Modified: lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h?rev=140750&r1=140749&r2=140750&view=diff
==============================================================================
--- lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h (original)
+++ lldb/trunk/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h Wed Sep 28 19:58:11 2011
@@ -47,11 +47,6 @@
         uint16_t version;            // Version number
         uint8_t  addr_bytesize;      // Size in bytes of an address
 		uint8_t  hash_function;      // The hash function enumeration that was used
-		uint8_t  hash_count_bitsize; // Size in bits of the hash count in each bucket entry
-		uint8_t  hash_index_bitsize; // Size in bits of the hash index in each bucket entry
-		uint8_t  hash_bytesize;      // Size in bytes of the hash that is stored in the hashes which must be <= 4
-		uint8_t  offset_bytesize;    // Size in bytes of the hash data offsets
-        
 		uint32_t bucket_count;       // The number of buckets in this hash table
 		uint32_t hashes_count;       // The total number of unique hash values and hash data offsets in this table
         uint32_t prologue_length;    // The length of the prologue
@@ -60,11 +55,7 @@
             magic (HASH_MAGIC),
             version (1),
             addr_bytesize (4),
-            hash_count_bitsize (8),
-            hash_index_bitsize (24),
             hash_function (eHashFunctionDJB),
-            hash_bytesize (4),      // Store the entire 32 bit hash by default
-            offset_bytesize (4),    // Store a 4 byte offset for every hash value
             bucket_count (0),
             hashes_count (0),
             prologue_length (_prologue_length)
@@ -83,10 +74,6 @@
             sizeof(version) + 
             sizeof(addr_bytesize) + 
             sizeof(hash_function) +
-            sizeof(hash_count_bitsize) +
-            sizeof(hash_index_bitsize) +
-            sizeof(hash_bytesize) +
-            sizeof(offset_bytesize) +
             sizeof(bucket_count) +
             sizeof(hashes_count) +
             sizeof(prologue_length) +
@@ -236,12 +223,6 @@
         }
         
         uint32_t
-        GetHashIndexMask () const
-        {
-            return (1u << m_header.hash_index_bitsize) - 1u;
-        }
-        
-        uint32_t
         GetOffsetOfBucketEntry (uint32_t idx) const
         {
             if (idx < m_header.bucket_count)