[Lldb-commits] [lldb] r251402 - Some minor improvements on the symtab parsing code

Tamas Berghammer via lldb-commits lldb-commits at lists.llvm.org
Tue Oct 27 03:43:28 PDT 2015


Author: tberghammer
Date: Tue Oct 27 05:43:27 2015
New Revision: 251402

URL: http://llvm.org/viewvc/llvm-project?rev=251402&view=rev
Log:
Some minor improvements on the symtab parsing code

* Remove an unneccessary re-computaion on arch spec from the ELF file
* Use a local cache to optimize name based section lookups in symtab
  parsing
* Optimize C++ method basename validation with replacing a regex with
  hand written code

These modifications reduce the time required to parse the symtab from
large applications by ~25% (tested with LLDB as inferior)

Differential revision: http://reviews.llvm.org/D14088

Modified:
    lldb/trunk/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
    lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp

Modified: lldb/trunk/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp?rev=251402&r1=251401&r2=251402&view=diff
==============================================================================
--- lldb/trunk/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp (original)
+++ lldb/trunk/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp Tue Oct 27 05:43:27 2015
@@ -131,6 +131,49 @@ ReverseFindMatchingChars (const llvm::St
     return false;
 }
 
+static bool
+IsValidBasename(const llvm::StringRef& basename)
+{
+    // Check that the basename matches with the following regular expression or is an operator name:
+    // "^~?([A-Za-z_][A-Za-z_0-9]*)(<.*>)?$"
+    // We are using a hand written implementation because it is significantly more efficient then
+    // using the general purpose regular expression library.
+    size_t idx = 0;
+    if (basename.size() > 0 && basename[0] == '~')
+        idx = 1;
+
+    if (basename.size() <= idx)
+        return false; // Empty string or "~"
+
+    if (!std::isalpha(basename[idx]) && basename[idx] != '_')
+        return false; // First charater (after removing the possible '~'') isn't in [A-Za-z_]
+
+    // Read all characters matching [A-Za-z_0-9]
+    ++idx;
+    while (idx < basename.size())
+    {
+        if (!std::isalnum(basename[idx]) && basename[idx] != '_')
+            break;
+        ++idx;
+    }
+
+    // We processed all characters. It is a vaild basename.
+    if (idx == basename.size())
+        return true;
+
+    // Check for basename with template arguments
+    // TODO: Improve the quality of the validation with validating the template arguments
+    if (basename[idx] == '<' && basename.back() == '>')
+        return true;
+
+    // Check if the basename is a vaild C++ operator name
+    if (!basename.startswith("operator"))
+        return false;
+
+    static RegularExpression g_operator_regex("^(operator)( ?)([A-Za-z_][A-Za-z_0-9]*|\\(\\)|\\[\\]|[\\^<>=!\\/*+-]+)(<.*>)?(\\[\\])?$");
+    std::string basename_str(basename.str());
+    return g_operator_regex.Execute(basename_str.c_str(), nullptr);
+}
 
 void
 CPlusPlusLanguage::MethodName::Parse()
@@ -201,30 +244,8 @@ CPlusPlusLanguage::MethodName::Parse()
                 m_parse_error = true;
                 return;
             }
-        
-//            if (!m_context.empty())
-//                printf ("   context = '%s'\n", m_context.str().c_str());
-//            if (m_basename)
-//                printf ("  basename = '%s'\n", m_basename.GetCString());
-//            if (!m_arguments.empty())
-//                printf (" arguments = '%s'\n", m_arguments.str().c_str());
-//            if (!m_qualifiers.empty())
-//                printf ("qualifiers = '%s'\n", m_qualifiers.str().c_str());
-
-            // Make sure we have a valid C++ basename with optional template args
-            static RegularExpression g_identifier_regex("^~?([A-Za-z_][A-Za-z_0-9]*)(<.*>)?$");
-            std::string basename_str(m_basename.str());
-            bool basename_is_valid = g_identifier_regex.Execute (basename_str.c_str(), NULL);
-            if (!basename_is_valid)
-            {
-                // Check for C++ operators
-                if (m_basename.startswith("operator"))
-                {
-                    static RegularExpression g_operator_regex("^(operator)( ?)([A-Za-z_][A-Za-z_0-9]*|\\(\\)|\\[\\]|[\\^<>=!\\/*+-]+)(<.*>)?(\\[\\])?$");
-                    basename_is_valid = g_operator_regex.Execute(basename_str.c_str(), NULL);
-                }
-            }
-            if (!basename_is_valid)
+
+            if (!IsValidBasename(m_basename))
             {
                 // The C++ basename doesn't match our regular expressions so this can't
                 // be a valid C++ method, clear everything out and indicate an error
@@ -238,7 +259,6 @@ CPlusPlusLanguage::MethodName::Parse()
         else
         {
             m_parse_error = true;
-//            printf ("error: didn't find matching parens for arguments\n");
         }
     }
 }

Modified: lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp?rev=251402&r1=251401&r2=251402&view=diff
==============================================================================
--- lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp (original)
+++ lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp Tue Oct 27 05:43:27 2015
@@ -11,6 +11,7 @@
 
 #include <cassert>
 #include <algorithm>
+#include <unordered_map>
 
 #include "lldb/Core/ArchSpec.h"
 #include "lldb/Core/DataBuffer.h"
@@ -1943,6 +1944,13 @@ ObjectFileELF::ParseSymbols (Symtab *sym
     // makes it highly unlikely that this will collide with anything else.
     bool skip_oatdata_oatexec = m_file.GetFilename() == ConstString("system at framework@boot.oat");
 
+    ArchSpec arch;
+    GetArchitecture(arch);
+
+    // Local cache to avoid doing a FindSectionByName for each symbol. The "const char*" key must
+    // came from a ConstString object so they can be compared by pointer
+    std::unordered_map<const char*, lldb::SectionSP> section_name_to_section;
+
     unsigned i;
     for (i = 0; i < num_symbols; ++i)
     {
@@ -2048,8 +2056,7 @@ ObjectFileELF::ParseSymbols (Symtab *sym
         int64_t symbol_value_offset = 0;
         uint32_t additional_flags = 0;
 
-        ArchSpec arch;
-        if (GetArchitecture(arch))
+        if (arch.IsValid())
         {
             if (arch.GetMachine() == llvm::Triple::arm)
             {
@@ -2175,11 +2182,13 @@ ObjectFileELF::ParseSymbols (Symtab *sym
                 if (module_section_list && module_section_list != section_list)
                 {
                     const ConstString &sect_name = symbol_section_sp->GetName();
-                    lldb::SectionSP section_sp (module_section_list->FindSectionByName (sect_name));
-                    if (section_sp && section_sp->GetFileSize())
-                    {
-                        symbol_section_sp = section_sp;
-                    }
+                    auto section_it = section_name_to_section.find(sect_name.GetCString());
+                    if (section_it == section_name_to_section.end())
+                        section_it = section_name_to_section.emplace(
+                            sect_name.GetCString(),
+                            module_section_list->FindSectionByName (sect_name)).first;
+                    if (section_it->second && section_it->second->GetFileSize())
+                        symbol_section_sp = section_it->second;
                 }
             }
         }




More information about the lldb-commits mailing list