commit: a682f25f51910a38b6b28733265ee3a6ce31274b
Author: Magnus Granberg <zorry <AT> gentoo <DOT> org>
AuthorDate: Tue Nov 16 03:17:45 2021 +0000
Commit: Magnus Granberg <zorry <AT> gentoo <DOT> org>
CommitDate: Tue Nov 16 03:17:45 2021 +0000
URL:
https://gitweb.gentoo.org/proj/tinderbox-cluster.git/commit/?id=a682f25f
Add new log parser
Signed-off-by: Magnus Granberg <zorry <AT> gentoo.org>
bin/ci_log_parser | 20 ++++
buildbot_gentoo_ci/config/buildfactorys.py | 3 +-
buildbot_gentoo_ci/db/builds.py | 11 +-
buildbot_gentoo_ci/db/model.py | 2 +-
buildbot_gentoo_ci/logs/log_parser.py | 174 +++++++++++++++++++++++++++++
buildbot_gentoo_ci/steps/builders.py | 27 +++--
buildbot_gentoo_ci/steps/logs.py | 79 +++++++++++--
logparser.json | 1 +
8 files changed, 288 insertions(+), 29 deletions(-)
diff --git a/bin/ci_log_parser b/bin/ci_log_parser
new file mode 100644
index 0000000..6401a49
--- /dev/null
+++ b/bin/ci_log_parser
@@ -0,0 +1,20 @@
+#!/usr/bin/python
+#
+# Copyright 2021 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+import argparse
+import sys
+from buildbot_gentoo_ci.logs.log_parser import runLogParser
+
+def main():
+# get filename, project_uuid default_project_uuid
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-f", "--file", required=True)
+ parser.add_argument("-u", "--uuid", required=True)
+ args = parser.parse_args()
+ runLogParser(args)
+ sys.exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/buildbot_gentoo_ci/config/buildfactorys.py
b/buildbot_gentoo_ci/config/buildfactorys.py
index d5f881b..27f0076 100644
--- a/buildbot_gentoo_ci/config/buildfactorys.py
+++ b/buildbot_gentoo_ci/config/buildfactorys.py
@@ -161,7 +161,8 @@ def parse_build_log():
# set needed Propertys
f.addStep(logs.SetupPropertys())
# pers the build log for info qa errors
- f.addStep(logs.ParserBuildLog())
+ f.addStep(logs.SetupParserBuildLoger())
+ #f.addStep(logs.ParserBuildLog())
# pers the log from pkg check
#f.addStep(logs.ParserPkgCheckLog())
# Upload the log to the cloud and remove the log
diff --git a/buildbot_gentoo_ci/db/builds.py b/buildbot_gentoo_ci/db/builds.py
index 2cd52bc..6639ed5 100644
--- a/buildbot_gentoo_ci/db/builds.py
+++ b/buildbot_gentoo_ci/db/builds.py
@@ -43,6 +43,7 @@ class BuildsConnectorComponent(base.DBConnectorComponent):
status=project_build_data['status'],
requested=project_build_data['requested'],
created_at=created_at,
+ buildbot_build_id=0,
build_id=new_number))
except (sa.exc.IntegrityError, sa.exc.ProgrammingError):
id = None
@@ -53,27 +54,25 @@ class BuildsConnectorComponent(base.DBConnectorComponent):
return self.db.pool.do(thd)
@defer.inlineCallbacks
- def setSatusBuilds(self, build_id, project_uuid, status):
+ def setSatusBuilds(self, id, status):
updated_at = int(self.master.reactor.seconds())
def thd(conn, no_recurse=False):
tbl = self.db.model.projects_builds
q = tbl.update()
- q = q.where(tbl.c.build_id == build_id)
- q = q.where(tbl.c.project_uuid == project_uuid)
+ q = q.where(tbl.c.id == id)
conn.execute(q, updated_at=updated_at,
status=status)
yield self.db.pool.do(thd)
@defer.inlineCallbacks
- def setBuildbotBuildIdBuilds(self, build_id, project_uuid,
buildbot_build_id):
+ def setBuildbotBuildIdBuilds(self, id, buildbot_build_id):
updated_at = int(self.master.reactor.seconds())
def thd(conn, no_recurse=False):
tbl = self.db.model.projects_builds
q = tbl.update()
- q = q.where(tbl.c.build_id == build_id)
- q = q.where(tbl.c.project_uuid == project_uuid)
+ q = q.where(tbl.c.id == id)
conn.execute(q, updated_at=updated_at,
buildbot_build_id=buildbot_build_id)
yield self.db.pool.do(thd)
diff --git a/buildbot_gentoo_ci/db/model.py b/buildbot_gentoo_ci/db/model.py
index b4efcf2..75ba873 100644
--- a/buildbot_gentoo_ci/db/model.py
+++ b/buildbot_gentoo_ci/db/model.py
@@ -230,7 +230,7 @@ class Model(base.DBConnectorComponent):
sa.Column('start', sa.Integer, default=0),
sa.Column('end', sa.Integer, default=0),
sa.Column('status', sa.Enum('info', 'warning', 'ignore', 'error'),
default='info'),
- sa.Column('type', sa.Enum('info', 'qa', 'compile', 'configure',
'install', 'postinst', 'prepare', 'setup', 'test', 'unpack', 'ignore'),
default='info'),
+ sa.Column('type', sa.Enum('info', 'qa', 'compile', 'configure',
'install', 'postinst', 'prepare', 'setup', 'test', 'unpack', 'ignore',
'issues', 'misc', 'elog'), default='info'),
sa.Column('search_type', sa.Enum('in', 'startswith', 'endswith',
'search'), default='in'),
)
diff --git a/buildbot_gentoo_ci/logs/log_parser.py
b/buildbot_gentoo_ci/logs/log_parser.py
new file mode 100644
index 0000000..550573d
--- /dev/null
+++ b/buildbot_gentoo_ci/logs/log_parser.py
@@ -0,0 +1,174 @@
+# Copyright 2021 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+import sys
+from multiprocessing import Pool, cpu_count
+import re
+import io
+import gzip
+import json
+import os
+from sqlalchemy.ext.declarative import declarative_base
+import sqlalchemy as sa
+
+Base = declarative_base()
+
+class ProjectsPattern(Base):
+ __tablename__ = "projects_pattern"
+ id = sa.Column(sa.Integer, primary_key=True)
+ project_uuid = sa.Column(sa.String(36), nullable=False)
+ search = sa.Column(sa.String(50), nullable=False)
+ start = sa.Column(sa.Integer, default=0)
+ end = sa.Column(sa.Integer, default=0)
+ status = sa.Column(sa.Enum('info', 'warning', 'ignore', 'error'),
default='info')
+ type = sa.Column(sa.Enum('info', 'qa', 'compile', 'configure', 'install',
'postinst', 'prepare', 'setup', 'test', 'unpack', 'ignore', 'issues', 'misc',
'elog'), default='info')
+ search_type = sa.Column(sa.Enum('in', 'startswith', 'endswith', 'search'),
default='in')
+
+def getDBSession(config):
+ #FIXME: Read the user/pass from file
+ engine = sa.create_engine(config['database'])
+ Session = sa.orm.sessionmaker(bind = engine)
+ return Session()
+
+def getMultiprocessingPool(config):
+ # Use cores/4 when multiprocessing
+ return Pool(processes = int(config['core']))
+ # multi_pool = getMultiprocessingPool()
+ # use multi_pool.apply_async(function, (args list)
+ # multi_pool.close()
+ # multi_pool.join()
+
+def addPatternToList(Session, pattern_list, uuid):
+ for project_pattern in
Session.query(ProjectsPattern).filter_by(project_uuid=uuid).all():
+ # check if the search pattern is vaild
+ try:
+ re.compile(project_pattern.search)
+ except re.error:
+ print("Non valid regex pattern")
+ print(project_pattern)
+ else:
+ patten_dict = {}
+ patten_dict['id'] = project_pattern.id
+ patten_dict['project_uuid'] = project_pattern.project_uuid
+ patten_dict['search'] = project_pattern.search
+ patten_dict['start'] = project_pattern.start
+ patten_dict['end'] = project_pattern.end
+ patten_dict['status'] = project_pattern.status
+ patten_dict['type'] = project_pattern.type
+ patten_dict['search_type'] = project_pattern.search_type
+ pattern_list.append(patten_dict)
+ return pattern_list
+
+def get_log_search_pattern(Session, uuid, default_uuid):
+ # get pattern from the projects
+ # add that to log_search_pattern_list
+ log_search_pattern_list = []
+ log_search_pattern_list = addPatternToList(Session,
log_search_pattern_list, uuid)
+ log_search_pattern_list = addPatternToList(Session,
log_search_pattern_list, default_uuid)
+ return log_search_pattern_list
+
+def search_buildlog(log_search_pattern_list, logfile_text_dict, tmp_index,
max_text_lines):
+ # get text line to search
+ text_line = logfile_text_dict[tmp_index]
+ summery_dict = {}
+ # loop true the pattern list for match
+ for search_pattern in log_search_pattern_list:
+ search_hit = False
+ if search_pattern['search_type'] == 'in':
+ if search_pattern['search'] in text_line:
+ search_hit = True
+ if search_pattern['search_type'] == 'startswith':
+ if text_line.startswith(search_pattern['search']):
+ search_hit = True
+ if search_pattern['search_type'] == 'endswith':
+ if text_line.endswith(search_pattern['search']):
+ search_hit = True
+ if search_pattern['search_type'] == 'search':
+ if re.search(search_pattern['search'], text_line):
+ search_hit = True
+ # add the line if the pattern match
+ if search_hit:
+ summery_dict[tmp_index] = {}
+ summery_dict[tmp_index]['text'] = text_line
+ summery_dict[tmp_index]['type'] = search_pattern['type']
+ summery_dict[tmp_index]['status'] = search_pattern['status']
+ summery_dict[tmp_index]['search_pattern_id'] = search_pattern['id']
+ # add upper text lines if requested
+ # max 5
+ if search_pattern['start'] != 0:
+ i = tmp_index - search_pattern['start'] - 1
+ match = True
+ while match:
+ i = i + 1
+ if i < (tmp_index - 9) or i == tmp_index:
+ match = False
+ else:
+ if not i in summery_dict:
+ summery_dict[i] = {}
+ summery_dict[i]['text'] = logfile_text_dict[i]
+ summery_dict[i]['type'] = 'info'
+ summery_dict[i]['status'] = 'info'
+ # add lower text lines if requested
+ # max 5
+ if search_pattern['end'] != 0:
+ i = tmp_index
+ end = tmp_index + search_pattern['end']
+ match = True
+ while match:
+ i = i + 1
+ if i > max_text_lines or i > end:
+ match = False
+ else:
+ if not i in summery_dict:
+ summery_dict[i] = {}
+ summery_dict[i]['text'] = logfile_text_dict[i]
+ summery_dict[i]['type'] = 'info'
+ summery_dict[i]['status'] = 'info'
+ else:
+ # we add all line that start with ' * ' as info
+ # we add all line that start with '>>>' but not '>>> /' as info
+ if text_line.startswith(' * ') or (text_line.startswith('>>>') and
not text_line.startswith('>>> /')):
+ if not tmp_index in summery_dict:
+ summery_dict[tmp_index] = {}
+ summery_dict[tmp_index]['text'] = text_line
+ summery_dict[tmp_index]['type'] = 'info'
+ summery_dict[tmp_index]['status'] = 'info'
+ #FIXME: print json
+ if summery_dict == {}:
+ return None
+ return summery_dict
+
+def getConfigSettings():
+ configpath = os.getcwd().split('workers/')[0]
+ with open(configpath + 'logparser.json') as f:
+ config = json.load(f)
+ return config
+
+def runLogParser(args):
+ index = 1
+ max_text_lines = 0
+ logfile_text_dict = {}
+ config = getConfigSettings()
+ Session = getDBSession(config)
+ mp_pool = getMultiprocessingPool(config)
+ #NOTE: The patten is from
https://github.com/toralf/tinderbox/tree/master/data files.
+ # Is stored in a db instead of files.
+ log_search_pattern_list = get_log_search_pattern(Session, args.uuid,
config['default_uuid'])
+ with io.TextIOWrapper(io.BufferedReader(gzip.open(args.file, 'rb'))) as f:
+ #FIXME: add support for multiprocessing
+ for text_line in f:
+ logfile_text_dict[index] = text_line.strip('\n')
+ # run the parse patten on the line
+ #search_buildlog(log_search_pattern_list, logfile_text_dict,
index, max_text_lines)
+ res = mp_pool.apply_async(search_buildlog,
(log_search_pattern_list, logfile_text_dict, index, max_text_lines,))
+ if res.get() is not None:
+ print(json.dumps(res.get()))
+ # remove text line that we don't need any more
+ if index >= 20:
+ del logfile_text_dict[index - 19]
+ index = index + 1
+ max_text_lines = index
+ mp_pool.close()
+ mp_pool.join()
+ f.close()
+ Session.close()
diff --git a/buildbot_gentoo_ci/steps/builders.py
b/buildbot_gentoo_ci/steps/builders.py
index 4c9444a..7247abd 100644
--- a/buildbot_gentoo_ci/steps/builders.py
+++ b/buildbot_gentoo_ci/steps/builders.py
@@ -295,16 +295,15 @@ class SetupPropertys(BuildStep):
self.setProperty('pkg_check_log_data', None, 'pkg_check_log_data')
self.setProperty('faild_version_data', None, 'faild_version_data')
self.setProperty('rerun', 0, 'rerun')
- print(self.getProperty("buildnumber"))
project_build_data = self.getProperty('project_build_data')
+ project_build_data['status'] = 'in-progress'
+ project_build_data['buildbot_build_id'] =
self.getProperty("buildnumber")
yield self.gentooci.db.builds.setSatusBuilds(
-
project_build_data['build_id'],
-
project_build_data['project_uuid'],
- 'in-progress')
+ project_build_data['id'],
+
project_build_data['status'])
yield self.gentooci.db.builds.setBuildbotBuildIdBuilds(
-
project_build_data['build_id'],
-
project_build_data['project_uuid'],
-
self.getProperty("buildnumber"))
+ project_build_data['id'],
+
project_build_data['buildbot_build_id'])
self.setProperty('project_build_data', project_build_data,
'project_build_data')
print(self.getProperty("project_build_data"))
return SUCCESS
@@ -583,9 +582,17 @@ class CheckEmergeLogs(BuildStep):
version_data = yield
self.gentooci.db.versions.getVersionByName(version, package_data['uuid'])
return version_data
+ @defer.inlineCallbacks
+ def createDistDir(self):
+ workdir = yield os.path.join(self.master.basedir, 'workers',
self.getProperty('workername'))
+ self.aftersteps_list.append(steps.MasterShellCommand(
+ command=['mkdir', str(self.getProperty("buildnumber"))],
+ workdir=workdir
+ ))
+
@defer.inlineCallbacks
def getLogFile(self, cpv, log_dict):
- masterdest = yield os.path.join(self.master.basedir, 'cpv_logs',
log_dict[cpv]['full_logname'])
+ masterdest = yield os.path.join(self.master.basedir, 'workers',
self.getProperty('workername'), str(self.getProperty("buildnumber"))
,log_dict[cpv]['full_logname'])
self.aftersteps_list.append(steps.FileUpload(
workersrc=log_dict[cpv]['log_path'],
masterdest=masterdest
@@ -753,6 +760,7 @@ class CheckEmergeLogs(BuildStep):
if cpv in log_dict or faild_cpv in log_dict:
if cpv in log_dict:
self.log_data[cpv] = log_dict[cpv]
+ yield self.createDistDir()
yield self.getLogFile(cpv, log_dict)
faild_version_data = False
if faild_cpv:
@@ -777,7 +785,8 @@ class CheckEmergeLogs(BuildStep):
'repository_data' :
self.getProperty('repository_data'),
'faild_cpv' : faild_cpv,
'step' : self.step,
- 'emerge_info' :
self.getProperty('emerge_info_output')['emerge_info']
+ 'emerge_info' :
self.getProperty('emerge_info_output')['emerge_info'],
+ 'build_workername' : self.getProperty('workername')
}
))
if not self.step is None and self.aftersteps_list != []:
diff --git a/buildbot_gentoo_ci/steps/logs.py b/buildbot_gentoo_ci/steps/logs.py
index 08ec6ce..e6e8495 100644
--- a/buildbot_gentoo_ci/steps/logs.py
+++ b/buildbot_gentoo_ci/steps/logs.py
@@ -6,6 +6,7 @@ import re
import gzip
import io
import hashlib
+import json
from portage.versions import catpkgsplit
@@ -20,6 +21,22 @@ from buildbot.process.results import SKIPPED
from buildbot.plugins import steps
from buildbot_gentoo_ci.steps import minio
+from buildbot_gentoo_ci.steps import master as master_steps
+
+def PersOutputOfLogParser(rc, stdout, stderr):
+ build_summery_output = {}
+ build_summery_output['rc'] = rc
+ build_summery_output_json_list = []
+ # split the lines
+ for line in stdout.split('\n'):
+ #FIXME: check if line start with {[1-9]: {
+ if line.startswith('{'):
+ build_summery_output_json_list.append(json.loads(line))
+ build_summery_output['build_summery_output_json'] =
build_summery_output_json_list
+ #FIXME: Handling of stderr output
+ return {
+ 'build_summery_output' : build_summery_output
+ }
class SetupPropertys(BuildStep):
@@ -46,6 +63,44 @@ class SetupPropertys(BuildStep):
self.setProperty("status", 'completed', 'status')
return SUCCESS
+class SetupParserBuildLoger(BuildStep):
+
+ name = 'SetupParserBuildLoger'
+ description = 'Running'
+ descriptionDone = 'Ran'
+ descriptionSuffix = None
+ haltOnFailure = True
+ flunkOnFailure = True
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+
+ @defer.inlineCallbacks
+ def run(self):
+ workdir = yield os.path.join(self.master.basedir, 'workers',
self.getProperty('build_workername'),
str(self.getProperty("project_build_data")['buildbot_build_id']))
+ if self.getProperty('faild_cpv'):
+ log_cpv =
self.getProperty('log_build_data')[self.getProperty('faild_cpv')]
+ else:
+ log_cpv =
self.getProperty('log_build_data')[self.getProperty('cpv')]
+ command = []
+ command.append('ci_log_parser')
+ command.append('-f')
+ command.append(log_cpv['full_logname'])
+ command.append('-u')
+ command.append(self.getProperty('project_data')['uuid'])
+ self.aftersteps_list = []
+ self.aftersteps_list.append(master_steps.MasterSetPropertyFromCommand(
+ name =
'RunBuildLogParser',
+ haltOnFailure =
True,
+ flunkOnFailure =
True,
+ command=command,
+ workdir=workdir,
+ strip=False,
+
extract_fn=PersOutputOfLogParser
+ ))
+ yield self.build.addStepsAfterCurrentStep(self.aftersteps_list)
+ return SUCCESS
+
class ParserBuildLog(BuildStep):
name = 'ParserBuildLog'
@@ -176,7 +231,7 @@ class ParserBuildLog(BuildStep):
log_cpv =
self.getProperty('log_build_data')[self.getProperty('faild_cpv')]
else:
log_cpv =
self.getProperty('log_build_data')[self.getProperty('cpv')]
- file_path = yield os.path.join(self.master.basedir, 'cpv_logs',
log_cpv['full_logname'])
+ file_path = yield os.path.join(self.master.basedir, 'workers',
self.getProperty('build_workername'),
str(self.getProperty("project_build_data")['buildbot_build_id'])
,log_cpv['full_logname'])
#FIXME: decode it to utf-8
with io.TextIOWrapper(io.BufferedReader(gzip.open(file_path, 'rb')))
as f:
for text_line in f:
@@ -214,18 +269,19 @@ class MakeIssue(BuildStep):
#@defer.inlineCallbacks
def run(self):
self.gentooci =
self.master.namedServices['services'].namedServices['gentooci']
- summary_log_dict = self.getProperty('summary_log_dict')
+ summary_log_dict_list =
self.getProperty('build_summery_output')['build_summery_output_json']
error = False
warning = False
self.summary_log_list = []
log_hash = hashlib.sha256()
- for k, v in sorted(summary_log_dict.items()):
- if v['status'] == 'error':
- error = True
- if v['status'] == 'warning':
- warning = True
- self.summary_log_list.append(v['text'])
- log_hash.update(v['text'].encode('utf-8'))
+ for summary_log_dict in summary_log_dict_list:
+ for k, v in sorted(summary_log_dict.items()):
+ if v['status'] == 'error':
+ error = True
+ if v['status'] == 'warning':
+ warning = True
+ self.summary_log_list.append(v['text'])
+ log_hash.update(v['text'].encode('utf-8'))
# add build log
# add issue/bug/pr report
self.setProperty("summary_log_list", self.summary_log_list,
'summary_log_list')
@@ -300,7 +356,7 @@ class Upload(BuildStep):
else:
log_cpv =
self.getProperty('log_build_data')[self.getProperty('cpv')]
bucket = self.getProperty('project_data')['uuid'] + '-' + 'logs'
- file_path = yield os.path.join(self.master.basedir, 'cpv_logs',
log_cpv['full_logname'])
+ file_path = yield os.path.join(self.master.basedir, 'workers',
self.getProperty('build_workername'),
str(self.getProperty("project_build_data")['buildbot_build_id'])
,log_cpv['full_logname'])
aftersteps_list = []
aftersteps_list.append(minio.putFileToMinio(file_path,
log_cpv['full_logname'], bucket))
yield self.build.addStepsAfterCurrentStep(aftersteps_list)
@@ -374,8 +430,7 @@ class setBuildStatus(BuildStep):
self.gentooci =
self.master.namedServices['services'].namedServices['gentooci']
project_build_data = self.getProperty('project_build_data')
yield self.gentooci.db.builds.setSatusBuilds(
-
project_build_data['build_id'],
-
project_build_data['project_uuid'],
+ project_build_data['id'],
self.getProperty('status')
)
if self.getProperty('status') == 'failed':
diff --git a/logparser.json b/logparser.json
new file mode 100644
index 0000000..9d4570d
--- /dev/null
+++ b/logparser.json
@@ -0,0 +1 @@
+{"database": "postgresql+psycopg2://user:password@host/gentoo-ci",
"default_uuid" : "uuid", "core" : "2" }