clayborg updated this revision to Diff 353715.
clayborg added a comment.

Fixed comments per Jim Ingham's comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104488/new/

https://reviews.llvm.org/D104488

Files:
  lldb/include/lldb/Symbol/ObjectFile.h
  lldb/include/lldb/Symbol/Symbol.h
  lldb/include/lldb/Symbol/Symtab.h
  lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
  lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
  lldb/source/Symbol/ObjectFile.cpp
  lldb/source/Symbol/Symbol.cpp
  lldb/source/Symbol/Symtab.cpp

Index: lldb/source/Symbol/Symtab.cpp
===================================================================
--- lldb/source/Symbol/Symtab.cpp
+++ lldb/source/Symbol/Symtab.cpp
@@ -295,7 +295,7 @@
       // the trampoline symbols to be searchable by name we can remove this and
       // then possibly add a new bool to any of the Symtab functions that
       // lookup symbols by name to indicate if they want trampolines.
-      if (symbol->IsTrampoline())
+      if (symbol->IsTrampoline() || symbol->IsSynthetic())
         continue;
 
       // If the symbol's name string matched a Mangled::ManglingScheme, it is
@@ -618,6 +618,36 @@
   }
 }
 
+uint32_t Symtab::GetNameIndexes(ConstString symbol_name,
+                                std::vector<uint32_t> &indexes) {
+  auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
+  const uint32_t count = name_to_index.GetValues(symbol_name, indexes);
+  if (count)
+    return count;
+  // Synthetic symbol names are not added to the name indexes, but they start
+  // with a prefix and end with a the symbol UserID. This allows users to find
+  // these symbols without having to add them to the name indexes. These
+  // queries will not happen very often since the names don't mean anything, so
+  // performance is not paramount in this case.
+  llvm::StringRef name = symbol_name.GetStringRef();
+  // String the synthetic prefix if the name starts with it.
+  if (!name.consume_front(Symbol::GetSyntheticSymbolPrefix()))
+    return 0; // Not a synthetic symbol name
+
+  // Extract the user ID from the symbol name
+  user_id_t uid = 0;
+  if (getAsUnsignedInteger(name, /*Radix=*/10, uid))
+    return 0; // Failed to extract the user ID as an integer
+  Symbol *symbol = FindSymbolByID(uid);
+  if (symbol == nullptr)
+    return 0;
+  const uint32_t symbol_idx = GetIndexForSymbol(symbol);
+  if (symbol_idx == UINT32_MAX)
+    return 0;
+  indexes.push_back(symbol_idx);
+  return 1;
+}
+
 uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
                                              std::vector<uint32_t> &indexes) {
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
@@ -627,8 +657,7 @@
     if (!m_name_indexes_computed)
       InitNameIndexes();
 
-    auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
-    return name_to_index.GetValues(symbol_name, indexes);
+    return GetNameIndexes(symbol_name, indexes);
   }
   return 0;
 }
@@ -645,10 +674,9 @@
     if (!m_name_indexes_computed)
       InitNameIndexes();
 
-    auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
     std::vector<uint32_t> all_name_indexes;
     const size_t name_match_count =
-        name_to_index.GetValues(symbol_name, all_name_indexes);
+        GetNameIndexes(symbol_name, all_name_indexes);
     for (size_t i = 0; i < name_match_count; ++i) {
       if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type,
                              symbol_visibility))
Index: lldb/source/Symbol/Symbol.cpp
===================================================================
--- lldb/source/Symbol/Symbol.cpp
+++ lldb/source/Symbol/Symbol.cpp
@@ -56,8 +56,8 @@
       m_size_is_synthesized(false),
       m_size_is_valid(size_is_valid || range.GetByteSize() > 0),
       m_demangled_is_synthesized(false),
-      m_contains_linker_annotations(contains_linker_annotations), 
-      m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), 
+      m_contains_linker_annotations(contains_linker_annotations),
+      m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range),
       m_flags(flags) {}
 
 Symbol::Symbol(const Symbol &rhs)
@@ -119,7 +119,7 @@
 }
 
 ConstString Symbol::GetDisplayName() const {
-  return m_mangled.GetDisplayDemangledName();
+  return GetMangled().GetDisplayDemangledName();
 }
 
 ConstString Symbol::GetReExportedSymbolName() const {
@@ -202,7 +202,7 @@
       s->Printf(", value = 0x%16.16" PRIx64,
                 m_addr_range.GetBaseAddress().GetOffset());
   }
-  ConstString demangled = m_mangled.GetDemangledName();
+  ConstString demangled = GetMangled().GetDemangledName();
   if (demangled)
     s->Printf(", name=\"%s\"", demangled.AsCString());
   if (m_mangled.GetMangledName())
@@ -218,7 +218,7 @@
   // Make sure the size of the symbol is up to date before dumping
   GetByteSize();
 
-  ConstString name = m_mangled.GetName(name_preference);
+  ConstString name = GetMangled().GetName(name_preference);
   if (ValueIsAddress()) {
     if (!m_addr_range.GetBaseAddress().Dump(s, nullptr,
                                             Address::DumpStyleFileAddress))
@@ -330,9 +330,11 @@
 }
 
 bool Symbol::Compare(ConstString name, SymbolType type) const {
-  if (type == eSymbolTypeAny || m_type == type)
-    return m_mangled.GetMangledName() == name ||
-           m_mangled.GetDemangledName() == name;
+  if (type == eSymbolTypeAny || m_type == type) {
+    const Mangled &mangled = GetMangled();
+    return mangled.GetMangledName() == name ||
+           mangled.GetDemangledName() == name;
+  }
   return false;
 }
 
@@ -495,10 +497,10 @@
     return LLDB_INVALID_ADDRESS;
 }
 
-ConstString Symbol::GetName() const { return m_mangled.GetName(); }
+ConstString Symbol::GetName() const { return GetMangled().GetName(); }
 
 ConstString Symbol::GetNameNoArguments() const {
-  return m_mangled.GetName(Mangled::ePreferDemangledWithoutArguments);
+  return GetMangled().GetName(Mangled::ePreferDemangledWithoutArguments);
 }
 
 lldb::addr_t Symbol::ResolveCallableAddress(Target &target) const {
@@ -565,3 +567,21 @@
 bool Symbol::ContainsFileAddress(lldb::addr_t file_addr) const {
   return m_addr_range.ContainsFileAddress(file_addr);
 }
+
+void Symbol::SynthesizeNameIfNeeded() const {
+  if (m_is_synthetic && !m_mangled) {
+    // Synthetic symbol names don't mean anything, but they do uniquely
+    // identify individual symbols so we give them a unique name. The name
+    // starts with the synthetic symbol prefix, followed by a unique number.
+    // Typically the UserID of a real symbol is the symbol table index of the
+    // symbol in the object file's symbol table(s), so it will be the same
+    // every time you read in the object file. We want the same persistence for
+    // synthetic symbols so that users can identify them across multiple debug
+    // sessions, to understand crashes in those symbols and to reliably set
+    // breakpoints on them.
+    llvm::SmallString<256> name;
+    llvm::raw_svector_ostream os(name);
+    os << GetSyntheticSymbolPrefix() << GetID();
+    m_mangled.SetDemangledName(ConstString(os.str()));
+  }
+}
Index: lldb/source/Symbol/ObjectFile.cpp
===================================================================
--- lldb/source/Symbol/ObjectFile.cpp
+++ lldb/source/Symbol/ObjectFile.cpp
@@ -616,16 +616,6 @@
   return symbol_type_hint;
 }
 
-ConstString ObjectFile::GetNextSyntheticSymbolName() {
-  llvm::SmallString<256> name;
-  llvm::raw_svector_ostream os(name);
-  ConstString file_name = GetModule()->GetFileSpec().GetFilename();
-  ++m_synthetic_symbol_idx;
-  os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$"
-     << file_name.GetStringRef();
-  return ConstString(os.str());
-}
-
 std::vector<ObjectFile::LoadableData>
 ObjectFile::GetLoadableData(Target &target) {
   std::vector<LoadableData> loadables;
Index: lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
===================================================================
--- lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -4696,8 +4696,10 @@
                 symbol_byte_size = section_end_file_addr - symbol_file_addr;
               }
               sym[sym_idx].SetID(synthetic_sym_id++);
-              sym[sym_idx].GetMangled().SetDemangledName(
-                  GetNextSyntheticSymbolName());
+              // Don't set the name for any synthetic symbols, the Symbol
+              // object will generate one if needed when the name is accessed
+              // via accessors.
+              sym[sym_idx].GetMangled().SetDemangledName(ConstString());
               sym[sym_idx].SetType(eSymbolTypeCode);
               sym[sym_idx].SetIsSynthetic(true);
               sym[sym_idx].GetAddressRef() = symbol_addr;
Index: lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
===================================================================
--- lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -1880,7 +1880,7 @@
           unified_section_list.AddSection(symtab_section_sp);
       }
     }
-  }  
+  }
 }
 
 std::shared_ptr<ObjectFileELF> ObjectFileELF::GetGnuDebugDataObjectFile() {
@@ -2813,20 +2813,24 @@
       if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress(
                                       entry_point_file_addr)) {
         uint64_t symbol_id = m_symtab_up->GetNumSymbols();
-        Symbol symbol(symbol_id,
-                      GetNextSyntheticSymbolName().GetCString(), // Symbol name.
-                      eSymbolTypeCode, // Type of this symbol.
-                      true,            // Is this globally visible?
-                      false,           // Is this symbol debug info?
-                      false,           // Is this symbol a trampoline?
-                      true,            // Is this symbol artificial?
-                      entry_point_addr.GetSection(), // Section where this
-                                                     // symbol is defined.
-                      0,     // Offset in section or symbol value.
-                      0,     // Size.
-                      false, // Size is valid.
-                      false, // Contains linker annotations?
-                      0);    // Symbol flags.
+        // Don't set the name for any synthetic symbols, the Symbol
+        // object will generate one if needed when the name is accessed
+        // via accessors.
+        SectionSP section_sp = entry_point_addr.GetSection();
+        Symbol symbol(
+            /*symID=*/symbol_id,
+            /*name=*/llvm::StringRef(), // Name will be auto generated.
+            /*type=*/eSymbolTypeCode,
+            /*external=*/true,
+            /*is_debug=*/false,
+            /*is_trampoline=*/false,
+            /*is_artificial=*/true,
+            /*section_sp=*/section_sp,
+            /*offset=*/entry_point_addr.GetOffset(),
+            /*size=*/0, // FDE can span multiple symbols so don't use its size.
+            /*size_is_valid=*/false,
+            /*contains_linker_annotations=*/false,
+            /*flags=*/0);
         m_symtab_up->AddSymbol(symbol);
         // When the entry point is arm thumb we need to explicitly set its
         // class address to reflect that. This is important because expression
@@ -2917,22 +2921,24 @@
           section_list->FindSectionContainingFileAddress(file_addr);
       if (section_sp) {
         addr_t offset = file_addr - section_sp->GetFileAddress();
-        const char *symbol_name = GetNextSyntheticSymbolName().GetCString();
         uint64_t symbol_id = ++last_symbol_id;
+        // Don't set the name for any synthetic symbols, the Symbol
+        // object will generate one if needed when the name is accessed
+        // via accessors.
         Symbol eh_symbol(
-            symbol_id,       // Symbol table index.
-            symbol_name,     // Symbol name.
-            eSymbolTypeCode, // Type of this symbol.
-            true,            // Is this globally visible?
-            false,           // Is this symbol debug info?
-            false,           // Is this symbol a trampoline?
-            true,            // Is this symbol artificial?
-            section_sp,      // Section in which this symbol is defined or null.
-            offset,          // Offset in section or symbol value.
-            0,     // Size:          Don't specify the size as an FDE can
-            false, // Size is valid: cover multiple symbols.
-            false, // Contains linker annotations?
-            0);    // Symbol flags.
+            /*symID=*/symbol_id,
+            /*name=*/llvm::StringRef(), // Name will be auto generated.
+            /*type=*/eSymbolTypeCode,
+            /*external=*/true,
+            /*is_debug=*/false,
+            /*is_trampoline=*/false,
+            /*is_artificial=*/true,
+            /*section_sp=*/section_sp,
+            /*offset=*/offset,
+            /*size=*/0, // FDE can span multiple symbols so don't use its size.
+            /*size_is_valid=*/false,
+            /*contains_linker_annotations=*/false,
+            /*flags=*/0);
         new_symbols.push_back(eh_symbol);
       }
     }
Index: lldb/include/lldb/Symbol/Symtab.h
===================================================================
--- lldb/include/lldb/Symbol/Symtab.h
+++ lldb/include/lldb/Symbol/Symtab.h
@@ -219,6 +219,26 @@
     return false;
   }
 
+  /// A helper function that looks up full function names.
+  ///
+  /// We generate unique names for synthetic symbols so that users can look
+  /// them up by name when needed. But because doing so is uncommon in normal
+  /// debugger use, we trade off some performance at lookup time for faster
+  /// symbol table building by detecting these symbols and generating their
+  /// names lazily, rather than adding them to the normal symbol indexes. This
+  /// function does the job of first consulting the name indexes, and if that
+  /// fails it extracts the information it needs from the synthetic name and
+  /// locates the symbol.
+  ///
+  /// @param[in] symbol_name The symbol name to search for.
+  ///
+  /// @param[out] indexes The vector if symbol indexes to update with results.
+  ///
+  /// @returns The number of indexes added to the index vector. Zero if no
+  /// matches were found.
+  uint32_t GetNameIndexes(ConstString symbol_name,
+                          std::vector<uint32_t> &indexes);
+
   void SymbolIndicesToSymbolContextList(std::vector<uint32_t> &symbol_indexes,
                                         SymbolContextList &sc_list);
 
Index: lldb/include/lldb/Symbol/Symbol.h
===================================================================
--- lldb/include/lldb/Symbol/Symbol.h
+++ lldb/include/lldb/Symbol/Symbol.h
@@ -113,14 +113,20 @@
   lldb::LanguageType GetLanguage() const {
     // TODO: See if there is a way to determine the language for a symbol
     // somehow, for now just return our best guess
-    return m_mangled.GuessLanguage();
+    return GetMangled().GuessLanguage();
   }
 
   void SetID(uint32_t uid) { m_uid = uid; }
 
-  Mangled &GetMangled() { return m_mangled; }
+  Mangled &GetMangled() {
+    SynthesizeNameIfNeeded();
+    return m_mangled;
+  }
 
-  const Mangled &GetMangled() const { return m_mangled; }
+  const Mangled &GetMangled() const {
+    SynthesizeNameIfNeeded();
+    return m_mangled;
+  }
 
   ConstString GetReExportedSymbolName() const;
 
@@ -166,9 +172,9 @@
   bool IsTrampoline() const;
 
   bool IsIndirect() const;
-  
+
   bool IsWeak() const { return m_is_weak; }
-  
+
   void SetIsWeak (bool b) { m_is_weak = b; }
 
   bool GetByteSizeIsValid() const { return m_size_is_valid; }
@@ -223,6 +229,10 @@
 
   bool ContainsFileAddress(lldb::addr_t file_addr) const;
 
+  static llvm::StringRef GetSyntheticSymbolPrefix() {
+    return "___lldb_unnamed_symbol";
+  }
+
 protected:
   // This is the internal guts of ResolveReExportedSymbol, it assumes
   // reexport_name is not null, and that module_spec is valid.  We track the
@@ -233,6 +243,8 @@
       lldb_private::ModuleSpec &module_spec,
       lldb_private::ModuleList &seen_modules) const;
 
+  void SynthesizeNameIfNeeded() const;
+
   uint32_t m_uid =
       UINT32_MAX;           // User ID (usually the original symbol table index)
   uint16_t m_type_data = 0; // data specific to m_type
@@ -258,7 +270,7 @@
                                          // doing name lookups
       m_is_weak : 1,
       m_type : 6;            // Values from the lldb::SymbolType enum.
-  Mangled m_mangled;         // uniqued symbol name/mangled name pair
+  mutable Mangled m_mangled; // uniqued symbol name/mangled name pair
   AddressRange m_addr_range; // Contains the value, or the section offset
                              // address when the value is an address in a
                              // section, and the size (if any)
Index: lldb/include/lldb/Symbol/ObjectFile.h
===================================================================
--- lldb/include/lldb/Symbol/ObjectFile.h
+++ lldb/include/lldb/Symbol/ObjectFile.h
@@ -712,8 +712,6 @@
   ///     false otherwise.
   bool SetModulesArchitecture(const ArchSpec &new_arch);
 
-  ConstString GetNextSyntheticSymbolName();
-
   static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size,
                                         uint64_t Offset);
 
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to