jingham created this revision.
jingham added reviewers: JDevlieghere, labath, clayborg, shafik.
Herald added a subscriber: mgorny.
Herald added a project: All.
jingham requested review of this revision.
Herald added a project: LLDB.
Herald added a subscriber: lldb-commits.

We don't require users to type out the full context of a function, for symbol 
name matches.  Instead, we extract the incoming path's base name, look up all 
the symbols with that base name, and then compare the rest of the context that 
the user provided to make sure it matches.  However, we do this comparison 
using just a strstr.  So for instance:

break set -n foo::bar

will match not only "a::foo::bar" but "notherfoo::bar".  The former is pretty 
clearly the user's intent, but I don't think the latter is, and results in 
breakpoints picking up too many matches.

This change adds a Language::DemangledNameContainsPath API which can do a 
language aware match against the path provided.  If the language doesn't 
provide this we fall back to the strstr (though that's changed to 
StringRef::contains in the patch).


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D124579

Files:
  lldb/include/lldb/Target/Language.h
  lldb/source/Core/Module.cpp
  lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
  lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
  lldb/source/Target/Language.cpp
  lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py
  lldb/test/API/functionalities/breakpoint/cpp/main.cpp
  lldb/test/API/functionalities/return-value/TestReturnValue.py
  lldb/test/API/macosx/indirect_symbol/TestIndirectSymbols.py
  lldb/tools/CMakeLists.txt
  lldb/tools/lldb-instr/CMakeLists.txt
  lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp

Index: lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp
===================================================================
--- lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp
+++ lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp
@@ -123,6 +123,45 @@
   }
 }
 
+TEST(CPlusPlusLanguage, ContainsPath) {
+  CPlusPlusLanguage::MethodName 
+      reference_1(ConstString("foo::bar::func01(int a, double b)"));
+  CPlusPlusLanguage::MethodName
+      reference_2(ConstString("foofoo::bar::func01(std::string a, int b)"));
+  CPlusPlusLanguage::MethodName reference_3(ConstString("func01()"));
+  
+  struct TestCase {
+    std::string path;
+    bool result;
+  };
+  TestCase test_cases_1[] = {
+    {"func01", true},
+    {"bar::func01", true},
+    {"foo::bar::func01", true},
+    {"func", false},
+    {"baz::func01", false},
+    {"::bar::func01", false},
+    {"::foo::baz::func01", false},
+    {"foo::bar::baz::func01", false}
+  };
+  TestCase test_cases_2[] {
+    {"foofoo::bar::func01", true},
+    {"foo::bar::func01", false}
+  };
+  TestCase test_cases_3[] {
+    {"func01", true},
+    {"func", false},
+    {"bar::func01", false},
+  };
+
+  for (auto test : test_cases_1) {
+    EXPECT_EQ(reference_1.ContainsPath(test.path), test.result);
+  }
+  for (auto test : test_cases_2) {
+    EXPECT_EQ(reference_2.ContainsPath(test.path), test.result);
+  }
+}
+
 TEST(CPlusPlusLanguage, ExtractContextAndIdentifier) {
   struct TestCase {
     std::string input;
Index: lldb/tools/lldb-instr/CMakeLists.txt
===================================================================
--- lldb/tools/lldb-instr/CMakeLists.txt
+++ lldb/tools/lldb-instr/CMakeLists.txt
@@ -9,7 +9,7 @@
     clangLex
     clangRewrite
     clangSerialization
-    clangTooling
+    clangToolingCore
 
   LINK_COMPONENTS
     Support
Index: lldb/tools/CMakeLists.txt
===================================================================
--- lldb/tools/CMakeLists.txt
+++ lldb/tools/CMakeLists.txt
@@ -8,7 +8,7 @@
 add_subdirectory(lldb-test EXCLUDE_FROM_ALL)
 add_subdirectory(lldb-fuzzer EXCLUDE_FROM_ALL)
 
-add_lldb_tool_subdirectory(lldb-instr)
+#add_lldb_tool_subdirectory(lldb-instr)
 add_lldb_tool_subdirectory(lldb-vscode)
 
 if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
Index: lldb/test/API/macosx/indirect_symbol/TestIndirectSymbols.py
===================================================================
--- lldb/test/API/macosx/indirect_symbol/TestIndirectSymbols.py
+++ lldb/test/API/macosx/indirect_symbol/TestIndirectSymbols.py
@@ -98,7 +98,7 @@
         # make sure we are again in out target function.
         break_reexported = target.BreakpointCreateByName(
             "reexport_to_indirect")
-        self.assertTrue(break_reexported, VALID_BREAKPOINT)
+        self.assertEqual(break_reexported.GetNumLocations(), 1, VALID_BREAKPOINT)
 
         # Now continue should take us to the second call through the indirect
         # symbol:
Index: lldb/test/API/functionalities/return-value/TestReturnValue.py
===================================================================
--- lldb/test/API/functionalities/return-value/TestReturnValue.py
+++ lldb/test/API/functionalities/return-value/TestReturnValue.py
@@ -237,7 +237,7 @@
 
         # Set the breakpoint, run to it, finish out.
         bkpt = self.target.BreakpointCreateByName(func_name)
-        self.assertTrue(bkpt.GetNumResolvedLocations() > 0)
+        self.assertTrue(bkpt.GetNumResolvedLocations() > 0, "Got wrong number of locations for {0}".format(func_name))
 
         self.process.Continue()
 
Index: lldb/test/API/functionalities/breakpoint/cpp/main.cpp
===================================================================
--- lldb/test/API/functionalities/breakpoint/cpp/main.cpp
+++ lldb/test/API/functionalities/breakpoint/cpp/main.cpp
@@ -24,6 +24,29 @@
     c::~c() {}
 }
 
+namespace aa {
+    class cc {
+    public:
+        cc();
+        ~cc();
+        void func1() 
+        {
+            puts (__PRETTY_FUNCTION__);
+        }
+        void func2() 
+        {
+            puts (__PRETTY_FUNCTION__);
+        }
+        void func3() 
+        {
+            puts (__PRETTY_FUNCTION__);
+        }
+    };
+
+    cc::cc() {}
+    cc::~cc() {}
+}
+
 namespace b {
     class c {
     public:
@@ -62,11 +85,15 @@
 int main (int argc, char const *argv[])
 {
     a::c ac;
+    aa::cc aac;
     b::c bc;
     c::d cd;
     ac.func1();
     ac.func2();
     ac.func3();
+    aac.func1();
+    aac.func2();
+    aac.func3();
     bc.func1();
     bc.func3();
     cd.func2();
Index: lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py
===================================================================
--- lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py
+++ lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py
@@ -28,7 +28,7 @@
         self.assertEquals(
             bp.GetNumLocations(),
             len(names),
-            "Make sure we find the right number of breakpoint locations")
+            "Make sure we find the right number of breakpoint locations for {}".format(name))
 
         bp_loc_names = list()
         for bp_loc in bp:
@@ -47,9 +47,9 @@
         target = self.dbg.CreateTarget(exe)
         self.assertTrue(target, VALID_TARGET)
         bp_dicts = [
-            {'name': 'func1', 'loc_names': ['a::c::func1()', 'b::c::func1()']},
-            {'name': 'func2', 'loc_names': ['a::c::func2()', 'c::d::func2()']},
-            {'name': 'func3', 'loc_names': ['a::c::func3()', 'b::c::func3()', 'c::d::func3()']},
+            {'name': 'func1', 'loc_names': ['a::c::func1()', 'aa::cc::func1()', 'b::c::func1()']},
+            {'name': 'func2', 'loc_names': ['a::c::func2()', 'aa::cc::func2()', 'c::d::func2()']},
+            {'name': 'func3', 'loc_names': ['a::c::func3()', 'aa::cc::func3()', 'b::c::func3()', 'c::d::func3()']},
             {'name': 'c::func1', 'loc_names': ['a::c::func1()', 'b::c::func1()']},
             {'name': 'c::func2', 'loc_names': ['a::c::func2()']},
             {'name': 'c::func3', 'loc_names': ['a::c::func3()', 'b::c::func3()']},
Index: lldb/source/Target/Language.cpp
===================================================================
--- lldb/source/Target/Language.cpp
+++ lldb/source/Target/Language.cpp
@@ -428,6 +428,14 @@
   return false;
 }
 
+bool Language::DemangledNameContainsPath(llvm::StringRef path, 
+                                         ConstString demangled) const {
+  // The base implementation does a simple contains comparision:
+  if (path.empty())
+    return false;
+  return demangled.GetStringRef().contains(path);                                         
+}
+
 DumpValueObjectOptions::DeclPrintingHelper Language::GetDeclPrintingHelper() {
   return nullptr;
 }
Index: lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
===================================================================
--- lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
+++ lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
@@ -55,6 +55,8 @@
     llvm::StringRef GetArguments();
 
     llvm::StringRef GetQualifiers();
+    
+    bool ContainsPath(llvm::StringRef path);
 
   protected:
     void Parse();
@@ -105,6 +107,10 @@
   static llvm::StringRef GetPluginNameStatic() { return "cplusplus"; }
 
   bool SymbolNameFitsToLanguage(Mangled mangled) const override;
+  
+  bool DemangledNameContainsPath(llvm::StringRef path, 
+                                 ConstString demangled) const override;
+
 
   ConstString
   GetDemangledFunctionNameWithoutArguments(Mangled mangled) const override;
Index: lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
===================================================================
--- lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
+++ lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
@@ -268,6 +268,47 @@
   return res;
 }
 
+bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) {
+  if (!m_parsed)
+    Parse();
+  // If we can't parse the incoming name, then just check that it contains path.
+  if (m_parse_error)
+    return m_full.GetStringRef().contains(path);
+    
+  llvm::StringRef identifier;
+  llvm::StringRef context;
+  std::string path_str = path.str();
+  bool success 
+      = CPlusPlusLanguage::ExtractContextAndIdentifier(path_str.c_str(),
+                                                       context,
+                                                       identifier);
+  if (!success)
+    return m_full.GetStringRef().contains(path);
+
+  if (identifier != GetBasename())
+    return false;
+  // Incoming path only had an identifier, so we match.
+  if (context.empty())
+    return true;
+  // Incoming path has context but this method does not, no match.
+  if (m_context.empty())
+    return false;
+
+  // Make sure the incoming path's context is found in the method context:
+  size_t path_pos = m_context.rfind(context);
+  if (path_pos == llvm::StringRef::npos)
+      return false;
+
+  if (path_pos + context.size() != m_context.size())
+    return false;
+
+  // Make sure it's at a separator boundary:
+  if (path_pos == 0 || !std::isalnum(m_context[path_pos - 1]))
+    return true;
+    
+  return false;
+}
+
 bool CPlusPlusLanguage::IsCPPMangledName(llvm::StringRef name) {
   // FIXME!! we should really run through all the known C++ Language plugins
   // and ask each one if this is a C++ mangled name
@@ -280,6 +321,44 @@
   return true;
 }
 
+bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, 
+                                                  ConstString demangled) const {
+
+  MethodName demangled_name(demangled);
+  return demangled_name.ContainsPath(path);
+
+//  size_t from = 0;
+//  llvm::StringRef demangled_ref = demangled.GetStringRef();
+//  size_t dem_len = demangled_ref.size();
+//  size_t path_len = path.size();
+//  
+//  // The path string might appear more than once in the demangled string.  A
+//  // silly example is:
+//  // ::A::AA::A::A() 
+//  // If the user string was A::A, the first match is not on word boundaries, 
+//  // so is not a match, but the second match is bona fide.
+//  // So keep looking till we get to the end of the string.
+//  while (from < dem_len) {
+//    size_t position = demangled_ref.find(path, from);
+//    // The path isn't in the demangled string, return false:
+//    if (position == llvm::StringRef::npos) {
+//      return false;
+//    }
+//    
+//    // We don't want our match to end up inside any identifiers.  So we require
+//    // that the match string have no alphanumeric characters directly before or
+//    // after it.
+//    if (position == 0 || !std::isalnum(demangled_ref[position-1])) {
+//      size_t end_loc = position + path_len;
+//      if (end_loc == dem_len || !std::isalnum(demangled_ref[end_loc]))
+//        return true;
+//    }
+//    // This candidate didn't work, bump from past it and see if there's another.
+//    from = position + path_len;
+//  }
+//  return false;                                         
+}
+
 bool CPlusPlusLanguage::ExtractContextAndIdentifier(
     const char *name, llvm::StringRef &context, llvm::StringRef &identifier) {
   if (MSVCUndecoratedNameParser::IsMSVCUndecoratedName(name))
Index: lldb/source/Core/Module.cpp
===================================================================
--- lldb/source/Core/Module.cpp
+++ lldb/source/Core/Module.cpp
@@ -739,13 +739,27 @@
     while (i < sc_list.GetSize()) {
       if (!sc_list.GetContextAtIndex(i, sc))
         break;
-      ConstString full_name(sc.GetFunctionName());
-      if (full_name &&
-          ::strstr(full_name.GetCString(), m_name.GetCString()) == nullptr) {
-        sc_list.RemoveContextAtIndex(i);
-      } else {
-        ++i;
+      
+      llvm::StringRef user_name = m_name.GetStringRef();
+      bool keep_it = true;
+      Language *language = Language::FindPlugin(sc.GetLanguage());
+      // If the symbol has a language, then let the language make the match.
+      // Otherwise just check that the demangled name contains the user name.
+      if (language)
+        keep_it = language->DemangledNameContainsPath(m_name.GetStringRef(),
+                sc.GetFunctionName());
+      else {
+        llvm::StringRef full_name = sc.GetFunctionName().GetStringRef();
+        if (full_name.empty() && sc.symbol)
+            full_name = sc.symbol->GetName().GetStringRef();
+        // We always keep unnamed symbols:
+        if (!full_name.empty())
+          keep_it = full_name.contains(user_name);
       }
+      if (keep_it)
+        ++i;
+      else
+        sc_list.RemoveContextAtIndex(i);
     }
   }
 
Index: lldb/include/lldb/Target/Language.h
===================================================================
--- lldb/include/lldb/Target/Language.h
+++ lldb/include/lldb/Target/Language.h
@@ -217,6 +217,17 @@
                                         std::string &prefix,
                                         std::string &suffix);
 
+  // When looking up functions, we take a user provided string which may be a
+  // partial match to the full demangled name and compare it to the actual
+  // demangled name to see if it matches as much as the user specified.  An
+  // example of this is if the user provided A::my_function, but the
+  // symbol was really B::A::my_function.  We want that to be
+  // a match.  But we wouldn't want this to match AnotherB::A::my_function.  The
+  // user is specifying a truncated path, not a truncated set of characters.
+  // This function does a language-aware comparison for those purposes.
+  virtual bool DemangledNameContainsPath(llvm::StringRef path, 
+                                         ConstString demangled) const;
+
   // if a language has a custom format for printing variable declarations that
   // it wants LLDB to honor it should return an appropriate closure here
   virtual DumpValueObjectOptions::DeclPrintingHelper GetDeclPrintingHelper();
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to