Author: tberghammer Date: Tue Oct 27 05:43:27 2015 New Revision: 251402 URL: http://llvm.org/viewvc/llvm-project?rev=251402&view=rev Log: Some minor improvements on the symtab parsing code
* Remove an unneccessary re-computaion on arch spec from the ELF file * Use a local cache to optimize name based section lookups in symtab parsing * Optimize C++ method basename validation with replacing a regex with hand written code These modifications reduce the time required to parse the symtab from large applications by ~25% (tested with LLDB as inferior) Differential revision: http://reviews.llvm.org/D14088 Modified: lldb/trunk/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp Modified: lldb/trunk/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp?rev=251402&r1=251401&r2=251402&view=diff ============================================================================== --- lldb/trunk/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp (original) +++ lldb/trunk/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp Tue Oct 27 05:43:27 2015 @@ -131,6 +131,49 @@ ReverseFindMatchingChars (const llvm::St return false; } +static bool +IsValidBasename(const llvm::StringRef& basename) +{ + // Check that the basename matches with the following regular expression or is an operator name: + // "^~?([A-Za-z_][A-Za-z_0-9]*)(<.*>)?$" + // We are using a hand written implementation because it is significantly more efficient then + // using the general purpose regular expression library. + size_t idx = 0; + if (basename.size() > 0 && basename[0] == '~') + idx = 1; + + if (basename.size() <= idx) + return false; // Empty string or "~" + + if (!std::isalpha(basename[idx]) && basename[idx] != '_') + return false; // First charater (after removing the possible '~'') isn't in [A-Za-z_] + + // Read all characters matching [A-Za-z_0-9] + ++idx; + while (idx < basename.size()) + { + if (!std::isalnum(basename[idx]) && basename[idx] != '_') + break; + ++idx; + } + + // We processed all characters. It is a vaild basename. + if (idx == basename.size()) + return true; + + // Check for basename with template arguments + // TODO: Improve the quality of the validation with validating the template arguments + if (basename[idx] == '<' && basename.back() == '>') + return true; + + // Check if the basename is a vaild C++ operator name + if (!basename.startswith("operator")) + return false; + + static RegularExpression g_operator_regex("^(operator)( ?)([A-Za-z_][A-Za-z_0-9]*|\\(\\)|\\[\\]|[\\^<>=!\\/*+-]+)(<.*>)?(\\[\\])?$"); + std::string basename_str(basename.str()); + return g_operator_regex.Execute(basename_str.c_str(), nullptr); +} void CPlusPlusLanguage::MethodName::Parse() @@ -201,30 +244,8 @@ CPlusPlusLanguage::MethodName::Parse() m_parse_error = true; return; } - -// if (!m_context.empty()) -// printf (" context = '%s'\n", m_context.str().c_str()); -// if (m_basename) -// printf (" basename = '%s'\n", m_basename.GetCString()); -// if (!m_arguments.empty()) -// printf (" arguments = '%s'\n", m_arguments.str().c_str()); -// if (!m_qualifiers.empty()) -// printf ("qualifiers = '%s'\n", m_qualifiers.str().c_str()); - - // Make sure we have a valid C++ basename with optional template args - static RegularExpression g_identifier_regex("^~?([A-Za-z_][A-Za-z_0-9]*)(<.*>)?$"); - std::string basename_str(m_basename.str()); - bool basename_is_valid = g_identifier_regex.Execute (basename_str.c_str(), NULL); - if (!basename_is_valid) - { - // Check for C++ operators - if (m_basename.startswith("operator")) - { - static RegularExpression g_operator_regex("^(operator)( ?)([A-Za-z_][A-Za-z_0-9]*|\\(\\)|\\[\\]|[\\^<>=!\\/*+-]+)(<.*>)?(\\[\\])?$"); - basename_is_valid = g_operator_regex.Execute(basename_str.c_str(), NULL); - } - } - if (!basename_is_valid) + + if (!IsValidBasename(m_basename)) { // The C++ basename doesn't match our regular expressions so this can't // be a valid C++ method, clear everything out and indicate an error @@ -238,7 +259,6 @@ CPlusPlusLanguage::MethodName::Parse() else { m_parse_error = true; -// printf ("error: didn't find matching parens for arguments\n"); } } } Modified: lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp?rev=251402&r1=251401&r2=251402&view=diff ============================================================================== --- lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp (original) +++ lldb/trunk/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp Tue Oct 27 05:43:27 2015 @@ -11,6 +11,7 @@ #include <cassert> #include <algorithm> +#include <unordered_map> #include "lldb/Core/ArchSpec.h" #include "lldb/Core/DataBuffer.h" @@ -1943,6 +1944,13 @@ ObjectFileELF::ParseSymbols (Symtab *sym // makes it highly unlikely that this will collide with anything else. bool skip_oatdata_oatexec = m_file.GetFilename() == ConstString("system@framew...@boot.oat"); + ArchSpec arch; + GetArchitecture(arch); + + // Local cache to avoid doing a FindSectionByName for each symbol. The "const char*" key must + // came from a ConstString object so they can be compared by pointer + std::unordered_map<const char*, lldb::SectionSP> section_name_to_section; + unsigned i; for (i = 0; i < num_symbols; ++i) { @@ -2048,8 +2056,7 @@ ObjectFileELF::ParseSymbols (Symtab *sym int64_t symbol_value_offset = 0; uint32_t additional_flags = 0; - ArchSpec arch; - if (GetArchitecture(arch)) + if (arch.IsValid()) { if (arch.GetMachine() == llvm::Triple::arm) { @@ -2175,11 +2182,13 @@ ObjectFileELF::ParseSymbols (Symtab *sym if (module_section_list && module_section_list != section_list) { const ConstString §_name = symbol_section_sp->GetName(); - lldb::SectionSP section_sp (module_section_list->FindSectionByName (sect_name)); - if (section_sp && section_sp->GetFileSize()) - { - symbol_section_sp = section_sp; - } + auto section_it = section_name_to_section.find(sect_name.GetCString()); + if (section_it == section_name_to_section.end()) + section_it = section_name_to_section.emplace( + sect_name.GetCString(), + module_section_list->FindSectionByName (sect_name)).first; + if (section_it->second && section_it->second->GetFileSize()) + symbol_section_sp = section_it->second; } } } _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits