bin/crashreportScraper.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-)
New commits: commit 8ca321f9e52a43acf36a8f8184f81240bd946653 Author: Xisco Fauli <xiscofa...@libreoffice.org> AuthorDate: Thu Mar 14 22:49:19 2024 +0100 Commit: Xisco Fauli <xiscofa...@libreoffice.org> CommitDate: Fri Mar 15 12:58:57 2024 +0100 crashreportScraper: fix version comparison And remove version column, it's not very relevant anyway Change-Id: I9101d5f63aec237cdcbfc6eb3759714cca7c5328 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/164849 Tested-by: Jenkins Reviewed-by: Xisco Fauli <xiscofa...@libreoffice.org> diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py index 78b2f6cb5eb2..6ce91bcba189 100755 --- a/bin/crashreportScraper.py +++ b/bin/crashreportScraper.py @@ -18,6 +18,7 @@ import os import math from datetime import datetime import urllib.parse +import re def convert_str_to_date(value): value = value.replace('.', '') @@ -73,7 +74,7 @@ def parse_reports_and_get_most_recent_report_from_last_page(url): reports = soup.find("div", {"id": "reports"}).tbody ID, currentID = "", "" - version, currentVersion = "", "" + version, currentVersion = 0, 0 OS, currentOS = "", "" tr_list = reports.find_all("tr") @@ -81,7 +82,7 @@ def parse_reports_and_get_most_recent_report_from_last_page(url): td_list = tr.find_all("td") currentID = td_list[0].a.text.strip() - currentVersion = td_list[2].text.strip().split(': ')[1] + currentVersion = int(''.join(re.findall("\d+", td_list[2].text))) currentOS = td_list[3].text.strip() # get most recent version @@ -91,16 +92,13 @@ def parse_reports_and_get_most_recent_report_from_last_page(url): ID = currentID OS = currentOS - if not version: - version = currentVersion - if not ID: ID = currentID if not OS: OS = currentOS - return count, ID, version, OS + return count, ID, OS def parse_details_and_get_info(url, gitRepo): try: @@ -187,7 +185,7 @@ if __name__ == '__main__': with open(fileName, "a") as f: if bInsertHeader: line = ' '.join(["Name", "Ratio", "Count", "First report", "Last Report", - "ID", "Version", "Reason", "OS", "Stack", "Code Lines", "Last 4 UNO Commands", ' ']) + "ID", "Reason", "OS", "Stack", "Code Lines", "Last 4 UNO Commands", ' ']) f.write(line) f.flush() @@ -195,13 +193,13 @@ if __name__ == '__main__': if k not in crashesInFile: print("Parsing " + k) try: - crashCount, crashID, crashVersion, crashOS = parse_reports_and_get_most_recent_report_from_last_page( + crashCount, crashID, crashOS = parse_reports_and_get_most_recent_report_from_last_page( "https://crashreport.libreoffice.org/stats/signature/" + urllib.parse.quote(k)) crashReason, crashStack, codeLine, unoCommands = parse_details_and_get_info( "https://crashreport.libreoffice.org/stats/crash_details/" + crashID, args.repository) ratio = round(crashCount / ((lDate[2] - lDate[1]).days + 1), 2) line = ' '.join([k, str(ratio), str(crashCount) , lDate[1].strftime('%y/%m/%d'), lDate[2].strftime('%y/%m/%d'), - crashID, crashVersion, crashReason, crashOS, crashStack, codeLine, unoCommands, ' ']) + crashID, crashReason, crashOS, crashStack, codeLine, unoCommands, ' ']) f.write(line) f.flush() except (requests.exceptions.Timeout, AttributeError):