clayborg created this revision.
clayborg added reviewers: labath, aprantl, JDevlieghere, jingham, wallace.
Herald added subscribers: kristof.beyls, emaste.
clayborg requested review of this revision.
Herald added subscribers: lldb-commits, MaskRay.
Herald added a project: LLDB.

This fix was created after profiling the target creation of a large C/C++/ObjC 
application that contained almost 4,000,000 redacted symbol names. The symbol 
table parsing code was creating names for each of these synthetic symbols and 
adding them to the name indexes. The code was also adding the object file 
basename to the end of the symbol name which doesn't allow symbols from 
different shared libraries to share the names in the constant string pool.

Prior to this fix this was creating 180MB of "___lldb_unnamed_symbol" symbol 
names and was taking a long time to generate each name, add them to the string 
pool and then add each of these names to the name index.

This patch fixes the issue by:

- not adding a name to synthetic symbols at creation time, and allows name to 
be dynamically generated when accessed
- doesn't add synthetic symbol names to the name indexes, but catches this 
special case as name lookup time. Users won't typically set breakpoints or 
lookup these synthetic names, but support was added to do the lookup in case it 
does happen
- removes the object file baseanme from the generated names to allow the names 
to be shared in the constant string pool

Prior to this fix the startup times for a large application was:
12.5 seconds (cold file caches)
8.5 seconds (warm file caches)

After this fix:
9.7 seconds (cold file caches)
5.7 seconds (warm file caches)

The names of the symbols are auto generated by appending the symbol's UserID to 
the end of the "___lldb_unnamed_symbol" string and is only done when the name 
is requested from a synthetic symbol if it has no name.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D104488

Files:
  lldb/include/lldb/Symbol/ObjectFile.h
  lldb/include/lldb/Symbol/Symbol.h
  lldb/include/lldb/Symbol/Symtab.h
  lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
  lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
  lldb/source/Symbol/ObjectFile.cpp
  lldb/source/Symbol/Symbol.cpp
  lldb/source/Symbol/Symtab.cpp

Index: lldb/source/Symbol/Symtab.cpp
===================================================================
--- lldb/source/Symbol/Symtab.cpp
+++ lldb/source/Symbol/Symtab.cpp
@@ -291,7 +291,7 @@
       // the trampoline symbols to be searchable by name we can remove this and
       // then possibly add a new bool to any of the Symtab functions that
       // lookup symbols by name to indicate if they want trampolines.
-      if (symbol->IsTrampoline())
+      if (symbol->IsTrampoline() || symbol->IsSynthetic())
         continue;
 
       // If the symbol's name string matched a Mangled::ManglingScheme, it is
@@ -614,6 +614,36 @@
   }
 }
 
+uint32_t Symtab::GetNameIndexes(ConstString symbol_name,
+                                std::vector<uint32_t> &indexes) {
+  auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
+  const uint32_t count = name_to_index.GetValues(symbol_name, indexes);
+  if (count)
+    return count;
+  // Synthetic symbol names are not added to the name indexes, but they start
+  // with a prefix and end with a the symbol UserID, so allow users to find
+  // these without having to add them to the name indexes. This queries will
+  // not happen very often since the names don't mean anything, so performance
+  // is not paramount in this case.
+  llvm::StringRef name = symbol_name.GetStringRef();
+  // String the synthetic prefix if the name starts with it.
+  if (!name.consume_front(Symbol::GetSyntheticSymbolPrefix()))
+    return 0; // Not a synthetic symbol name
+
+  // Extract the user ID from the symbol name
+  user_id_t uid = 0;
+  if (getAsUnsignedInteger(name, 10 /* Radix */, uid))
+    return 0; // Failed to extract the user ID as an integer
+  Symbol *symbol = FindSymbolByID(uid);
+  if (symbol == nullptr)
+    return 0;
+  const uint32_t symbol_idx = GetIndexForSymbol(symbol);
+  if (symbol_idx == UINT32_MAX)
+    return 0;
+  indexes.push_back(symbol_idx);
+  return 1;
+}
+
 uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
                                              std::vector<uint32_t> &indexes) {
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
@@ -623,8 +653,7 @@
     if (!m_name_indexes_computed)
       InitNameIndexes();
 
-    auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
-    return name_to_index.GetValues(symbol_name, indexes);
+    return GetNameIndexes(symbol_name, indexes);
   }
   return 0;
 }
@@ -641,10 +670,9 @@
     if (!m_name_indexes_computed)
       InitNameIndexes();
 
-    auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
     std::vector<uint32_t> all_name_indexes;
     const size_t name_match_count =
-        name_to_index.GetValues(symbol_name, all_name_indexes);
+        GetNameIndexes(symbol_name, all_name_indexes);
     for (size_t i = 0; i < name_match_count; ++i) {
       if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type,
                              symbol_visibility))
Index: lldb/source/Symbol/Symbol.cpp
===================================================================
--- lldb/source/Symbol/Symbol.cpp
+++ lldb/source/Symbol/Symbol.cpp
@@ -56,8 +56,8 @@
       m_size_is_synthesized(false),
       m_size_is_valid(size_is_valid || range.GetByteSize() > 0),
       m_demangled_is_synthesized(false),
-      m_contains_linker_annotations(contains_linker_annotations), 
-      m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), 
+      m_contains_linker_annotations(contains_linker_annotations),
+      m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range),
       m_flags(flags) {}
 
 Symbol::Symbol(const Symbol &rhs)
@@ -119,7 +119,7 @@
 }
 
 ConstString Symbol::GetDisplayName() const {
-  return m_mangled.GetDisplayDemangledName();
+  return GetMangled().GetDisplayDemangledName();
 }
 
 ConstString Symbol::GetReExportedSymbolName() const {
@@ -202,7 +202,7 @@
       s->Printf(", value = 0x%16.16" PRIx64,
                 m_addr_range.GetBaseAddress().GetOffset());
   }
-  ConstString demangled = m_mangled.GetDemangledName();
+  ConstString demangled = GetMangled().GetDemangledName();
   if (demangled)
     s->Printf(", name=\"%s\"", demangled.AsCString());
   if (m_mangled.GetMangledName())
@@ -218,7 +218,7 @@
   // Make sure the size of the symbol is up to date before dumping
   GetByteSize();
 
-  ConstString name = m_mangled.GetName(name_preference);
+  ConstString name = GetMangled().GetName(name_preference);
   if (ValueIsAddress()) {
     if (!m_addr_range.GetBaseAddress().Dump(s, nullptr,
                                             Address::DumpStyleFileAddress))
@@ -331,8 +331,8 @@
 
 bool Symbol::Compare(ConstString name, SymbolType type) const {
   if (type == eSymbolTypeAny || m_type == type)
-    return m_mangled.GetMangledName() == name ||
-           m_mangled.GetDemangledName() == name;
+    return GetMangled().GetMangledName() == name ||
+           GetMangled().GetDemangledName() == name;
   return false;
 }
 
@@ -495,10 +495,10 @@
     return LLDB_INVALID_ADDRESS;
 }
 
-ConstString Symbol::GetName() const { return m_mangled.GetName(); }
+ConstString Symbol::GetName() const { return GetMangled().GetName(); }
 
 ConstString Symbol::GetNameNoArguments() const {
-  return m_mangled.GetName(Mangled::ePreferDemangledWithoutArguments);
+  return GetMangled().GetName(Mangled::ePreferDemangledWithoutArguments);
 }
 
 lldb::addr_t Symbol::ResolveCallableAddress(Target &target) const {
@@ -565,3 +565,12 @@
 bool Symbol::ContainsFileAddress(lldb::addr_t file_addr) const {
   return m_addr_range.ContainsFileAddress(file_addr);
 }
+
+void Symbol::SynthesizeNameIfNeeded() const {
+  if (m_is_synthetic && !m_mangled) {
+    llvm::SmallString<256> name;
+    llvm::raw_svector_ostream os(name);
+    os << GetSyntheticSymbolPrefix() << GetID();
+    m_mangled.SetDemangledName(ConstString(os.str()));
+  }
+}
Index: lldb/source/Symbol/ObjectFile.cpp
===================================================================
--- lldb/source/Symbol/ObjectFile.cpp
+++ lldb/source/Symbol/ObjectFile.cpp
@@ -616,16 +616,6 @@
   return symbol_type_hint;
 }
 
-ConstString ObjectFile::GetNextSyntheticSymbolName() {
-  llvm::SmallString<256> name;
-  llvm::raw_svector_ostream os(name);
-  ConstString file_name = GetModule()->GetFileSpec().GetFilename();
-  ++m_synthetic_symbol_idx;
-  os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$"
-     << file_name.GetStringRef();
-  return ConstString(os.str());
-}
-
 std::vector<ObjectFile::LoadableData>
 ObjectFile::GetLoadableData(Target &target) {
   std::vector<LoadableData> loadables;
Index: lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
===================================================================
--- lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -4695,8 +4695,10 @@
                 symbol_byte_size = section_end_file_addr - symbol_file_addr;
               }
               sym[sym_idx].SetID(synthetic_sym_id++);
-              sym[sym_idx].GetMangled().SetDemangledName(
-                  GetNextSyntheticSymbolName());
+              // Don't set the name for any synthetic symbols, the Symbol
+              // object will generate one if needed when the name is accessed
+              // via accessors.
+              sym[sym_idx].GetMangled().SetDemangledName(ConstString());
               sym[sym_idx].SetType(eSymbolTypeCode);
               sym[sym_idx].SetIsSynthetic(true);
               sym[sym_idx].GetAddressRef() = symbol_addr;
Index: lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
===================================================================
--- lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -1880,7 +1880,7 @@
           unified_section_list.AddSection(symtab_section_sp);
       }
     }
-  }  
+  }
 }
 
 std::shared_ptr<ObjectFileELF> ObjectFileELF::GetGnuDebugDataObjectFile() {
@@ -2813,13 +2813,16 @@
       if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress(
                                       entry_point_file_addr)) {
         uint64_t symbol_id = m_symtab_up->GetNumSymbols();
+        // Don't set the name for any synthetic symbols, the Symbol
+        // object will generate one if needed when the name is accessed
+        // via accessors.
         Symbol symbol(symbol_id,
-                      GetNextSyntheticSymbolName().GetCString(), // Symbol name.
-                      eSymbolTypeCode, // Type of this symbol.
-                      true,            // Is this globally visible?
-                      false,           // Is this symbol debug info?
-                      false,           // Is this symbol a trampoline?
-                      true,            // Is this symbol artificial?
+                      llvm::StringRef(), // Name will be auto generated.
+                      eSymbolTypeCode,   // Type of this symbol.
+                      true,              // Is this globally visible?
+                      false,             // Is this symbol debug info?
+                      false,             // Is this symbol a trampoline?
+                      true,              // Is this symbol artificial?
                       entry_point_addr.GetSection(), // Section where this
                                                      // symbol is defined.
                       0,     // Offset in section or symbol value.
@@ -2917,22 +2920,24 @@
           section_list->FindSectionContainingFileAddress(file_addr);
       if (section_sp) {
         addr_t offset = file_addr - section_sp->GetFileAddress();
-        const char *symbol_name = GetNextSyntheticSymbolName().GetCString();
         uint64_t symbol_id = ++last_symbol_id;
+        // Don't set the name for any synthetic symbols, the Symbol
+        // object will generate one if needed when the name is accessed
+        // via accessors.
         Symbol eh_symbol(
-            symbol_id,       // Symbol table index.
-            symbol_name,     // Symbol name.
-            eSymbolTypeCode, // Type of this symbol.
-            true,            // Is this globally visible?
-            false,           // Is this symbol debug info?
-            false,           // Is this symbol a trampoline?
-            true,            // Is this symbol artificial?
-            section_sp,      // Section in which this symbol is defined or null.
-            offset,          // Offset in section or symbol value.
-            0,     // Size:          Don't specify the size as an FDE can
-            false, // Size is valid: cover multiple symbols.
-            false, // Contains linker annotations?
-            0);    // Symbol flags.
+            symbol_id,         // Symbol table index.
+            llvm::StringRef(), // Name will be auto generated.
+            eSymbolTypeCode,   // Type of this symbol.
+            true,              // Is this globally visible?
+            false,             // Is this symbol debug info?
+            false,             // Is this symbol a trampoline?
+            true,              // Is this symbol artificial?
+            section_sp, // Section in which this symbol is defined or null.
+            offset,     // Offset in section or symbol value.
+            0,          // Size:          Don't specify the size as an FDE can
+            false,      // Size is valid: cover multiple symbols.
+            false,      // Contains linker annotations?
+            0);         // Symbol flags.
         new_symbols.push_back(eh_symbol);
       }
     }
Index: lldb/include/lldb/Symbol/Symtab.h
===================================================================
--- lldb/include/lldb/Symbol/Symtab.h
+++ lldb/include/lldb/Symbol/Symtab.h
@@ -219,6 +219,9 @@
     return false;
   }
 
+  uint32_t GetNameIndexes(ConstString symbol_name,
+                          std::vector<uint32_t> &indexes);
+
   void SymbolIndicesToSymbolContextList(std::vector<uint32_t> &symbol_indexes,
                                         SymbolContextList &sc_list);
 
Index: lldb/include/lldb/Symbol/Symbol.h
===================================================================
--- lldb/include/lldb/Symbol/Symbol.h
+++ lldb/include/lldb/Symbol/Symbol.h
@@ -113,14 +113,20 @@
   lldb::LanguageType GetLanguage() const {
     // TODO: See if there is a way to determine the language for a symbol
     // somehow, for now just return our best guess
-    return m_mangled.GuessLanguage();
+    return GetMangled().GuessLanguage();
   }
 
   void SetID(uint32_t uid) { m_uid = uid; }
 
-  Mangled &GetMangled() { return m_mangled; }
+  Mangled &GetMangled() {
+    SynthesizeNameIfNeeded();
+    return m_mangled;
+  }
 
-  const Mangled &GetMangled() const { return m_mangled; }
+  const Mangled &GetMangled() const {
+    SynthesizeNameIfNeeded();
+    return m_mangled;
+  }
 
   ConstString GetReExportedSymbolName() const;
 
@@ -166,9 +172,9 @@
   bool IsTrampoline() const;
 
   bool IsIndirect() const;
-  
+
   bool IsWeak() const { return m_is_weak; }
-  
+
   void SetIsWeak (bool b) { m_is_weak = b; }
 
   bool GetByteSizeIsValid() const { return m_size_is_valid; }
@@ -223,6 +229,10 @@
 
   bool ContainsFileAddress(lldb::addr_t file_addr) const;
 
+  static llvm::StringRef GetSyntheticSymbolPrefix() {
+    return "___lldb_unnamed_symbol";
+  }
+
 protected:
   // This is the internal guts of ResolveReExportedSymbol, it assumes
   // reexport_name is not null, and that module_spec is valid.  We track the
@@ -233,6 +243,8 @@
       lldb_private::ModuleSpec &module_spec,
       lldb_private::ModuleList &seen_modules) const;
 
+  void SynthesizeNameIfNeeded() const;
+
   uint32_t m_uid =
       UINT32_MAX;           // User ID (usually the original symbol table index)
   uint16_t m_type_data = 0; // data specific to m_type
@@ -258,7 +270,7 @@
                                          // doing name lookups
       m_is_weak : 1,
       m_type : 6;            // Values from the lldb::SymbolType enum.
-  Mangled m_mangled;         // uniqued symbol name/mangled name pair
+  mutable Mangled m_mangled; // uniqued symbol name/mangled name pair
   AddressRange m_addr_range; // Contains the value, or the section offset
                              // address when the value is an address in a
                              // section, and the size (if any)
Index: lldb/include/lldb/Symbol/ObjectFile.h
===================================================================
--- lldb/include/lldb/Symbol/ObjectFile.h
+++ lldb/include/lldb/Symbol/ObjectFile.h
@@ -696,8 +696,6 @@
   ///     false otherwise.
   bool SetModulesArchitecture(const ArchSpec &new_arch);
 
-  ConstString GetNextSyntheticSymbolName();
-
   static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size,
                                         uint64_t Offset);
 
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to