mib created this revision.
mib added reviewers: JDevlieghere, kastiglione, bulbazord.
mib added a project: LLDB.
Herald added a project: All.
mib requested review of this revision.
Herald added a subscriber: lldb-commits.

Sometimes, crash reports come with inlined symbols. These provide the
exact stacktrace from the user binary.

However, when investigating a crash, it's very likely that the images related
to the crashed thread are not available on the debugging user system or
that the versions don't match. This causes interactive crashlog to show
a degraded backtrace in lldb.

This patch aims to address that issue, by parsing the inlined symbols
from the crash report and load them into lldb's target.

To do, we rely on the new SymbolFileJSON plugin. This patch also changes
a few things:

- Add `SBModuleSpec::SetUUIDFromString` with a typemap.
- Add an overload to `SBTarget::FindModule(const SBModuleSpec&)`.
- Update the thread parsing method in the JSONCrashLogParser to extract the 
symbol name and address for each stack frame.
- Update the thread and image parsing methods in the TextCrashLogParser to 
refine the regural expressions. We now have new capture group for the symbol 
name and the offset.

So now, when parsing the crash report, we build a data structure
containing all the symbol information for each stackframe. Then, after
launching the scripted process for interactive mode, we write a JSON
symbol file for each module, only containing the symbols that it contains.

Finally, we load the json symbol file into lldb, before showing the user
the process status and backtrace.

rdar://97345586

Signed-off-by: Med Ismail Bennani <medismail.benn...@gmail.com>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D146765

Files:
  lldb/bindings/python/python-typemaps.swig
  lldb/examples/python/crashlog.py
  lldb/include/lldb/API/SBModuleSpec.h
  lldb/include/lldb/API/SBTarget.h
  lldb/source/API/SBModuleSpec.cpp
  lldb/source/API/SBTarget.cpp

Index: lldb/source/API/SBTarget.cpp
===================================================================
--- lldb/source/API/SBTarget.cpp
+++ lldb/source/API/SBTarget.cpp
@@ -1555,6 +1555,19 @@
   return sb_module;
 }
 
+SBModule SBTarget::FindModule(const SBModuleSpec &sb_module_spec) {
+  LLDB_INSTRUMENT_VA(this, sb_module_spec);
+
+  SBModule sb_module;
+  TargetSP target_sp(GetSP());
+  if (target_sp && sb_module_spec.IsValid()) {
+    // The module list is thread safe, no need to lock
+    sb_module.SetSP(
+        target_sp->GetImages().FindFirstModule(*sb_module_spec.m_opaque_up));
+  }
+  return sb_module;
+}
+
 SBSymbolContextList SBTarget::FindCompileUnits(const SBFileSpec &sb_file_spec) {
   LLDB_INSTRUMENT_VA(this, sb_file_spec);
 
Index: lldb/source/API/SBModuleSpec.cpp
===================================================================
--- lldb/source/API/SBModuleSpec.cpp
+++ lldb/source/API/SBModuleSpec.cpp
@@ -132,6 +132,12 @@
   return m_opaque_up->GetUUID().GetBytes().size();
 }
 
+bool SBModuleSpec::SetUUIDFromString(const char *uuid, size_t uuid_len) {
+  LLDB_INSTRUMENT_VA(this, uuid, uuid_len)
+  m_opaque_up->GetUUID().SetFromStringRef(llvm::StringRef(uuid, uuid_len));
+  return m_opaque_up->GetUUID().IsValid();
+}
+
 bool SBModuleSpec::SetUUIDBytes(const uint8_t *uuid, size_t uuid_len) {
   LLDB_INSTRUMENT_VA(this, uuid, uuid_len)
   m_opaque_up->GetUUID() = UUID(uuid, uuid_len);
Index: lldb/include/lldb/API/SBTarget.h
===================================================================
--- lldb/include/lldb/API/SBTarget.h
+++ lldb/include/lldb/API/SBTarget.h
@@ -305,6 +305,8 @@
 
   lldb::SBModule FindModule(const lldb::SBFileSpec &file_spec);
 
+  lldb::SBModule FindModule(const lldb::SBModuleSpec &module_spec);
+
   /// Find compile units related to *this target and passed source
   /// file.
   ///
Index: lldb/include/lldb/API/SBModuleSpec.h
===================================================================
--- lldb/include/lldb/API/SBModuleSpec.h
+++ lldb/include/lldb/API/SBModuleSpec.h
@@ -75,6 +75,8 @@
 
   size_t GetUUIDLength();
 
+  bool SetUUIDFromString(const char *uuid, size_t uuid_len);
+
   bool SetUUIDBytes(const uint8_t *uuid, size_t uuid_len);
 
   bool GetDescription(lldb::SBStream &description);
Index: lldb/examples/python/crashlog.py
===================================================================
--- lldb/examples/python/crashlog.py
+++ lldb/examples/python/crashlog.py
@@ -40,6 +40,7 @@
 import string
 import subprocess
 import sys
+import tempfile
 import threading
 import time
 import uuid
@@ -431,6 +432,7 @@
         self.path = os.path.expanduser(path)
         self.verbose = verbose
         self.crashlog = CrashLog(debugger, self.path, self.verbose)
+        self.symbol_data = {}
 
     @abc.abstractmethod
     def parse(self):
@@ -535,9 +537,24 @@
             if ident not in self.crashlog.idents:
                 self.crashlog.idents.append(ident)
 
+            image = self.get_used_image(image_id)
             frame_offset = int(json_frame['imageOffset'])
-            image_addr = self.get_used_image(image_id)['base']
-            pc = image_addr + frame_offset
+            pc = image['base'] + frame_offset
+
+            if 'symbol' in json_frame and pc != 0:
+                image_uuid = image['uuid']
+                if not image_uuid in self.symbol_data:
+                    self.symbol_data[image_uuid] = {
+                        "symbols" : list(),
+                        "uuid": image_uuid,
+                        "triple": None
+                    }
+                self.symbol_data[image_uuid]["symbols"].append({
+                    "name": json_frame['symbol'],
+                    "type": "code",
+                    "size": 0,
+                    "address": pc,
+                })
             thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset))
 
             # on arm64 systems, if it jump through a null function pointer,
@@ -586,8 +603,12 @@
                 print("error: can't parse application specific backtrace.")
                 return False
 
-            (frame_id, frame_img_name, frame_addr,
-                frame_ofs) = frame_match.groups()
+            if len(frame_match.groups()) == 4:
+                (frame_id, frame_img_name, frame_addr,
+                    frame_ofs) = frame_match.groups()
+            else:
+                (frame_id, frame_img_name, frame_addr,
+                    frame_ofs, frame_symbol, frame_offset) = frame_match.groups()
 
             thread.add_ident(frame_img_name)
             if frame_img_name not in self.crashlog.idents:
@@ -641,11 +662,11 @@
     thread_regex = re.compile(r'^Thread (\d+).*:')
     app_backtrace_regex = re.compile(r'^Application Specific Backtrace (\d+).*:')
     version = r'\(.+\)|(?:arm|x86_)[0-9a-z]+'
-    frame_regex = re.compile(r'^(\d+)\s+'              # id
-                             r'(.+?)\s+'               # img_name
-                             r'(?:' +version+ r'\s+)?' # img_version
-                             r'(0x[0-9a-fA-F]{4,})'    # addr (4 chars or more)
-                             r'(?: +(.*))?'            # offs
+    frame_regex = re.compile(r'^(\d+)\s+'                           # id
+                             r'(.+?)\s+'                            # img_name
+                             r'(?:' +version+ r'\s+)?'              # img_version
+                             r'(0x[0-9a-fA-F]{4,}) +'               # addr (4 chars or more)
+                             r'((.*)(?:(?: +\+ +)([0-9]+))|[^\s]+)' # symbol + offset
                             )
     null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{4,} +')
     image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'          # img_lo
@@ -828,12 +849,37 @@
             return
         frame_match = self.frame_regex.search(line)
         if frame_match:
-            (frame_id, frame_img_name, frame_addr,
-                frame_ofs) = frame_match.groups()
+            if len(frame_match.groups()) == 4:
+                # Get the image UUID from the frame image name.
+                (frame_id, frame_img_name, frame_addr,
+                    frame_ofs) = frame_match.groups()
+            else:
+                (frame_id, frame_img_name, frame_addr,
+                    frame_ofs, frame_symbol, frame_offset) = frame_match.groups()
             ident = frame_img_name
             self.thread.add_ident(ident)
             if ident not in self.crashlog.idents:
                 self.crashlog.idents.append(ident)
+
+            # Because of the Textual Crash Report layout, the thread section
+            # gets parsed before the binary images section. This means we can
+            # get the image's UUID and triple right away.
+            # This is why we need to do a second pass when parsing the binary
+            # images section to update these fields.
+            if not frame_ofs or int(frame_addr) != 0:
+                if frame_img_name not in self.symbol_data:
+                    self.symbol_data[frame_img_name] = {
+                        "symbols" : list(),
+                        "uuid": uuid.UUID(int=0),
+                        "triple": None
+                    }
+                self.symbol_data[frame_img_name]['symbols'].append({
+                    "name": frame_ofs,
+                    "type": "code",
+                    "size": 0,
+                    "address": frame_addr,
+                })
+
             self.thread.frames.append(self.crashlog.Frame(int(frame_id), int(
                 frame_addr, 0), frame_ofs))
         else:
@@ -844,6 +890,11 @@
         if image_match:
             (img_lo, img_hi, img_name, img_version,
                 img_uuid, img_path) = image_match.groups()
+
+            # Now that we've parsed the image, we can update its symbol_data UUID
+            if img_name in self.symbol_data:
+                self.symbol_data[frame_img_name]['uuid'] = image_uuid
+
             image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
                                             img_name.strip(),
                                             img_version.strip()
@@ -1076,7 +1127,8 @@
     if not os.path.exists(crashlog_path):
         raise InteractiveCrashLogException("crashlog file %s does not exist" % crashlog_path)
 
-    crashlog = CrashLogParser.create(debugger, crashlog_path, False).parse()
+    crashlog_parser = CrashLogParser.create(debugger, crashlog_path, False)
+    crashlog = crashlog_parser.parse()
 
     target = lldb.SBTarget()
     # 1. Try to use the user-provided target
@@ -1116,6 +1168,35 @@
     if not process or error.Fail():
         raise InteractiveCrashLogException("couldn't launch Scripted Process", error)
 
+    tmp_dir = tempfile.TemporaryDirectory()
+    for image, symbol_data in crashlog_parser.symbol_data.items():
+        module_spec = lldb.SBModuleSpec()
+        if crashlog_parser.__class__ == JSONCrashLogParser:
+                u = uuid.UUID(image)
+                module_spec.SetUUIDFromString(u.hex)
+        else:
+            module_spec.SetObjectName(image)
+        module = target.FindModule(module_spec)
+        if module.IsValid():
+            if not symbol_data['triple']:
+                symbol_data["triple"] = module.GetTriple()
+        else:
+            print("Skipping loading inlined symbols for module %s" % image)
+            continue
+
+        symbol_file = os.path.join(tmp_dir.name, module.GetUUIDString() + ".json")
+        with open(symbol_file, 'w') as f:
+            f.write(json.dumps(symbol_data))
+
+        ci.HandleCommand("target symbols add -s '%s' %s" %
+                         (module.GetFileSpec().fullpath, symbol_file), result)
+        if not result.Succeeded():
+            raise InteractiveCrashLogException("couldn't import crash report \
+                                               inlined symbols for %s (%s)" %
+                                               (module.file.basename,
+                                                module.GetUUIDString()))
+
+
     if not options.skip_status:
         @contextlib.contextmanager
         def synchronous(debugger):
Index: lldb/bindings/python/python-typemaps.swig
===================================================================
--- lldb/bindings/python/python-typemaps.swig
+++ lldb/bindings/python/python-typemaps.swig
@@ -162,8 +162,10 @@
 // typemap for an outgoing buffer
 // See also SBEvent::SBEvent(uint32_t event, const char *cstr, uint32_t cstr_len).
 // Ditto for SBProcess::PutSTDIN(const char *src, size_t src_len).
+// Ditto for SBModuleSpec::SetUUIDFromString(const char *uuid, size_t uuid_len).
 %typemap(in) (const char *cstr, uint32_t cstr_len),
-             (const char *src, size_t src_len) {
+             (const char *src, size_t src_len),
+             (const char *uuid, size_t uuid_len) {
   if (PythonString::Check($input)) {
     PythonString str(PyRefType::Borrowed, $input);
     $1 = (char *)str.GetString().data();
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to