Author: Med Ismail Bennani Date: 2023-08-11T23:59:42-07:00 New Revision: 8f75c4d01eff3c65d7ae40bfd05582de7dffa590
URL: https://github.com/llvm/llvm-project/commit/8f75c4d01eff3c65d7ae40bfd05582de7dffa590 DIFF: https://github.com/llvm/llvm-project/commit/8f75c4d01eff3c65d7ae40bfd05582de7dffa590.diff LOG: [lldb/crashlog] Make TextCrashLogParser more resilient to new lines This patch changes the parsing logic for the legacy crash report format to avoid interrupting the parsing if there are new lines in the middle of a section. To do, the parser starts to skip all consecutive empty lines. If the number of lines skipped is greater than 1, the parser considers that it reached a new setion of the report and should reset the parsing mode to back to normal. Otherwise, it tries to parse the next line in the current parsing mode. If it succeeds, the parser will also skip that line since it has already been parsed and continue the parsing. rdar://107022595 Differential Revision: https://reviews.llvm.org/D157043 Signed-off-by: Med Ismail Bennani <ism...@bennani.ma> Added: Modified: lldb/examples/python/crashlog.py Removed: ################################################################################ diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index ccf3fb1aa9146a..7f1a43b435689f 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -537,21 +537,21 @@ class InteractiveCrashLogException(Exception): class CrashLogParser: @staticmethod - def create(debugger, path, verbose): + def create(debugger, path, options): data = JSONCrashLogParser.is_valid_json(path) if data: - parser = JSONCrashLogParser(debugger, path, verbose) + parser = JSONCrashLogParser(debugger, path, options) parser.data = data return parser else: - return TextCrashLogParser(debugger, path, verbose) + return TextCrashLogParser(debugger, path, options) - def __init__(self, debugger, path, verbose): + def __init__(self, debugger, path, options): self.path = os.path.expanduser(path) - self.verbose = verbose + self.options = options # List of DarwinImages sorted by their index. self.images = list() - self.crashlog = CrashLog(debugger, self.path, self.verbose) + self.crashlog = CrashLog(debugger, self.path, self.options.verbose) @abc.abstractmethod def parse(self): @@ -577,8 +577,8 @@ def parse_json(buffer): except: return None - def __init__(self, debugger, path, verbose): - super().__init__(debugger, path, verbose) + def __init__(self, debugger, path, options): + super().__init__(debugger, path, options) def parse(self): try: @@ -639,7 +639,7 @@ def parse_images(self, json_images): path = json_image["path"] if "path" in json_image else "" version = "" darwin_image = self.crashlog.DarwinImage( - low, high, name, version, img_uuid, path, self.verbose + low, high, name, version, img_uuid, path, self.options.verbose ) if "arch" in json_image: darwin_image.arch = json_image["arch"] @@ -898,8 +898,8 @@ def get(cls): ) exception_extra_regex = re.compile(r"^Exception\s+.*:\s+(.*)") - def __init__(self, debugger, path, verbose): - super().__init__(debugger, path, verbose) + def __init__(self, debugger, path, options): + super().__init__(debugger, path, options) self.thread = None self.app_specific_backtrace = False self.parse_mode = CrashLogParseMode.NORMAL @@ -917,8 +917,15 @@ def parse(self): with open(self.path, "r", encoding="utf-8") as f: lines = f.read().splitlines() - for line in lines: + idx = 0 + lines_count = len(lines) + while True: + if idx >= lines_count: + break + + line = lines[idx] line_len = len(line) + if line_len == 0: if self.thread: if self.parse_mode == CrashLogParseMode.THREAD: @@ -935,22 +942,36 @@ def parse(self): else: self.crashlog.threads.append(self.thread) self.thread = None - else: - # only append an extra empty line if the previous line - # in the info_lines wasn't empty - if len(self.crashlog.info_lines) > 0 and len( - self.crashlog.info_lines[-1] - ): - self.crashlog.info_lines.append(line) + + empty_lines = 1 + while ( + idx + empty_lines < lines_count + and len(lines[idx + empty_lines]) == 0 + ): + empty_lines = empty_lines + 1 + + if ( + empty_lines == 1 + and idx + empty_lines < lines_count - 1 + and self.parse_mode != CrashLogParseMode.NORMAL + ): + # check if next line can be parsed with the current parse mode + next_line_idx = idx + empty_lines + if self.parsers[self.parse_mode](lines[next_line_idx]): + # If that suceeded, skip the empty line and the next line. + idx = next_line_idx + 1 + continue self.parse_mode = CrashLogParseMode.NORMAL - else: - self.parsers[self.parse_mode](line) + + self.parsers[self.parse_mode](line) + + idx = idx + 1 return self.crashlog def parse_exception(self, line): if not line.startswith("Exception"): - return + return False if line.startswith("Exception Type:"): self.crashlog.thread_exception = line[15:].strip() exception_type_match = self.exception_type_regex.search(line) @@ -968,7 +989,7 @@ def parse_exception(self, line): elif line.startswith("Exception Codes:"): self.crashlog.thread_exception_data = line[16:].strip() if "type" not in self.crashlog.exception: - return + return False exception_codes_match = self.exception_codes_regex.search(line) if exception_codes_match: self.crashlog.exception["codes"] = self.crashlog.thread_exception_data @@ -979,10 +1000,11 @@ def parse_exception(self, line): ] else: if "type" not in self.crashlog.exception: - return + return False exception_extra_match = self.exception_extra_regex.search(line) if exception_extra_match: self.crashlog.exception["message"] = exception_extra_match.group(1) + return True def parse_normal(self, line): if line.startswith("Process:"): @@ -1081,14 +1103,14 @@ def parse_normal(self, line): def parse_thread(self, line): if line.startswith("Thread"): - return + return False if self.null_frame_regex.search(line): print('warning: thread parser ignored null-frame: "%s"' % line) - return + return False frame_match = self.frame_regex.search(line) if not frame_match: print('error: frame regex failed for line: "%s"' % line) - return + return False frame_id = ( frame_img_name @@ -1155,6 +1177,8 @@ def parse_thread(self, line): self.crashlog.Frame(int(frame_id), int(frame_addr, 0), description) ) + return True + def parse_images(self, line): image_match = self.image_regex_uuid.search(line) if image_match: @@ -1174,7 +1198,7 @@ def parse_images(self, line): img_version.strip() if img_version else "", uuid.UUID(img_uuid), img_path, - self.verbose, + self.options.verbose, ) unqualified_img_name = os.path.basename(img_path) if unqualified_img_name in self.symbols: @@ -1188,17 +1212,22 @@ def parse_images(self, line): self.images.append(image) self.crashlog.images.append(image) + return True else: - print("error: image regex failed for: %s" % line) + if self.options.debug: + print("error: image regex failed for: %s" % line) + return False def parse_thread_registers(self, line): # "r12: 0x00007fff6b5939c8 r13: 0x0000000007000006 r14: 0x0000000000002a03 r15: 0x0000000000000c00" reg_values = re.findall("([a-z0-9]+): (0x[0-9a-f]+)", line, re.I) for reg, value in reg_values: self.thread.registers[reg] = int(value, 16) + return len(reg_values) != 0 def parse_system(self, line): self.crashlog.system_profile.append(line) + return True def parse_instructions(self, line): pass @@ -1412,7 +1441,7 @@ def add_module(image, target, obj_dir): def load_crashlog_in_scripted_process(debugger, crashlog_path, options, result): - crashlog = CrashLogParser.create(debugger, crashlog_path, False).parse() + crashlog = CrashLogParser.create(debugger, crashlog_path, options).parse() target = lldb.SBTarget() # 1. Try to use the user-provided target @@ -1735,7 +1764,7 @@ def should_run_in_interactive_mode(options, ci): result.SetError(str(e)) else: crash_log = CrashLogParser.create( - debugger, crashlog_path, options.verbose + debugger, crashlog_path, options ).parse() SymbolicateCrashLog(crash_log, options) _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits