commit:     a682f25f51910a38b6b28733265ee3a6ce31274b
Author:     Magnus Granberg <zorry <AT> gentoo <DOT> org>
AuthorDate: Tue Nov 16 03:17:45 2021 +0000
Commit:     Magnus Granberg <zorry <AT> gentoo <DOT> org>
CommitDate: Tue Nov 16 03:17:45 2021 +0000
URL:        
https://gitweb.gentoo.org/proj/tinderbox-cluster.git/commit/?id=a682f25f

Add new log parser

Signed-off-by: Magnus Granberg <zorry <AT> gentoo.org>

 bin/ci_log_parser                          |  20 ++++
 buildbot_gentoo_ci/config/buildfactorys.py |   3 +-
 buildbot_gentoo_ci/db/builds.py            |  11 +-
 buildbot_gentoo_ci/db/model.py             |   2 +-
 buildbot_gentoo_ci/logs/log_parser.py      | 174 +++++++++++++++++++++++++++++
 buildbot_gentoo_ci/steps/builders.py       |  27 +++--
 buildbot_gentoo_ci/steps/logs.py           |  79 +++++++++++--
 logparser.json                             |   1 +
 8 files changed, 288 insertions(+), 29 deletions(-)

diff --git a/bin/ci_log_parser b/bin/ci_log_parser
new file mode 100644
index 0000000..6401a49
--- /dev/null
+++ b/bin/ci_log_parser
@@ -0,0 +1,20 @@
+#!/usr/bin/python
+#
+# Copyright 2021 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+import argparse
+import sys
+from buildbot_gentoo_ci.logs.log_parser import runLogParser
+
+def main():
+# get filename, project_uuid default_project_uuid
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-f", "--file", required=True)
+    parser.add_argument("-u", "--uuid", required=True)
+    args = parser.parse_args()
+    runLogParser(args)
+    sys.exit()
+
+if __name__ == "__main__":
+    main()

diff --git a/buildbot_gentoo_ci/config/buildfactorys.py 
b/buildbot_gentoo_ci/config/buildfactorys.py
index d5f881b..27f0076 100644
--- a/buildbot_gentoo_ci/config/buildfactorys.py
+++ b/buildbot_gentoo_ci/config/buildfactorys.py
@@ -161,7 +161,8 @@ def parse_build_log():
     # set needed Propertys
     f.addStep(logs.SetupPropertys())
     # pers the build log for info qa errors
-    f.addStep(logs.ParserBuildLog())
+    f.addStep(logs.SetupParserBuildLoger())
+    #f.addStep(logs.ParserBuildLog())
     # pers the log from pkg check
     #f.addStep(logs.ParserPkgCheckLog())
     # Upload the log to the cloud and remove the log

diff --git a/buildbot_gentoo_ci/db/builds.py b/buildbot_gentoo_ci/db/builds.py
index 2cd52bc..6639ed5 100644
--- a/buildbot_gentoo_ci/db/builds.py
+++ b/buildbot_gentoo_ci/db/builds.py
@@ -43,6 +43,7 @@ class BuildsConnectorComponent(base.DBConnectorComponent):
                                          status=project_build_data['status'],
                                          
requested=project_build_data['requested'],
                                          created_at=created_at,
+                                         buildbot_build_id=0,
                                          build_id=new_number))
             except (sa.exc.IntegrityError, sa.exc.ProgrammingError):
                 id = None
@@ -53,27 +54,25 @@ class BuildsConnectorComponent(base.DBConnectorComponent):
         return self.db.pool.do(thd)
 
     @defer.inlineCallbacks
-    def setSatusBuilds(self, build_id, project_uuid, status):
+    def setSatusBuilds(self, id, status):
         updated_at = int(self.master.reactor.seconds())
         def thd(conn, no_recurse=False):
         
                 tbl = self.db.model.projects_builds
                 q = tbl.update()
-                q = q.where(tbl.c.build_id == build_id)
-                q = q.where(tbl.c.project_uuid == project_uuid)
+                q = q.where(tbl.c.id == id)
                 conn.execute(q, updated_at=updated_at,
                                 status=status)
         yield self.db.pool.do(thd)
 
     @defer.inlineCallbacks
-    def setBuildbotBuildIdBuilds(self, build_id, project_uuid, 
buildbot_build_id):
+    def setBuildbotBuildIdBuilds(self, id, buildbot_build_id):
         updated_at = int(self.master.reactor.seconds())
         def thd(conn, no_recurse=False):
         
                 tbl = self.db.model.projects_builds
                 q = tbl.update()
-                q = q.where(tbl.c.build_id == build_id)
-                q = q.where(tbl.c.project_uuid == project_uuid)
+                q = q.where(tbl.c.id == id)
                 conn.execute(q, updated_at=updated_at,
                                 buildbot_build_id=buildbot_build_id)
         yield self.db.pool.do(thd)

diff --git a/buildbot_gentoo_ci/db/model.py b/buildbot_gentoo_ci/db/model.py
index b4efcf2..75ba873 100644
--- a/buildbot_gentoo_ci/db/model.py
+++ b/buildbot_gentoo_ci/db/model.py
@@ -230,7 +230,7 @@ class Model(base.DBConnectorComponent):
         sa.Column('start', sa.Integer, default=0),
         sa.Column('end', sa.Integer, default=0),
         sa.Column('status', sa.Enum('info', 'warning', 'ignore', 'error'), 
default='info'),
-        sa.Column('type', sa.Enum('info', 'qa', 'compile', 'configure', 
'install', 'postinst', 'prepare', 'setup', 'test', 'unpack', 'ignore'), 
default='info'),
+        sa.Column('type', sa.Enum('info', 'qa', 'compile', 'configure', 
'install', 'postinst', 'prepare', 'setup', 'test', 'unpack', 'ignore', 
'issues', 'misc', 'elog'), default='info'),
         sa.Column('search_type', sa.Enum('in', 'startswith', 'endswith', 
'search'), default='in'),
     )
 

diff --git a/buildbot_gentoo_ci/logs/log_parser.py 
b/buildbot_gentoo_ci/logs/log_parser.py
new file mode 100644
index 0000000..550573d
--- /dev/null
+++ b/buildbot_gentoo_ci/logs/log_parser.py
@@ -0,0 +1,174 @@
+# Copyright 2021 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+import sys
+from multiprocessing import Pool, cpu_count
+import re
+import io
+import gzip
+import json
+import os
+from sqlalchemy.ext.declarative import declarative_base
+import sqlalchemy as sa
+
+Base = declarative_base()
+
+class ProjectsPattern(Base):
+    __tablename__ = "projects_pattern"
+    id = sa.Column(sa.Integer, primary_key=True)
+    project_uuid = sa.Column(sa.String(36), nullable=False)
+    search = sa.Column(sa.String(50), nullable=False)
+    start = sa.Column(sa.Integer, default=0)
+    end = sa.Column(sa.Integer, default=0)
+    status = sa.Column(sa.Enum('info', 'warning', 'ignore', 'error'), 
default='info')
+    type = sa.Column(sa.Enum('info', 'qa', 'compile', 'configure', 'install', 
'postinst', 'prepare', 'setup', 'test', 'unpack', 'ignore', 'issues', 'misc', 
'elog'), default='info')
+    search_type = sa.Column(sa.Enum('in', 'startswith', 'endswith', 'search'), 
default='in')
+
+def getDBSession(config):
+    #FIXME: Read the user/pass from file
+    engine = sa.create_engine(config['database'])
+    Session = sa.orm.sessionmaker(bind = engine)
+    return Session()
+
+def getMultiprocessingPool(config):
+    # Use cores/4 when multiprocessing
+    return Pool(processes = int(config['core']))
+    # multi_pool = getMultiprocessingPool()
+    # use multi_pool.apply_async(function, (args list)
+    # multi_pool.close()
+    # multi_pool.join()
+
+def addPatternToList(Session, pattern_list, uuid):
+    for project_pattern in 
Session.query(ProjectsPattern).filter_by(project_uuid=uuid).all():
+        # check if the search pattern is vaild
+        try:
+            re.compile(project_pattern.search)
+        except re.error:
+            print("Non valid regex pattern")
+            print(project_pattern)
+        else:
+            patten_dict = {}
+            patten_dict['id'] = project_pattern.id
+            patten_dict['project_uuid'] = project_pattern.project_uuid
+            patten_dict['search'] = project_pattern.search
+            patten_dict['start'] = project_pattern.start
+            patten_dict['end'] = project_pattern.end
+            patten_dict['status'] = project_pattern.status
+            patten_dict['type'] = project_pattern.type
+            patten_dict['search_type'] = project_pattern.search_type
+            pattern_list.append(patten_dict)
+    return pattern_list
+
+def get_log_search_pattern(Session, uuid, default_uuid):
+    # get pattern from the projects
+    # add that to log_search_pattern_list
+    log_search_pattern_list = []
+    log_search_pattern_list = addPatternToList(Session, 
log_search_pattern_list, uuid)
+    log_search_pattern_list = addPatternToList(Session, 
log_search_pattern_list, default_uuid)
+    return log_search_pattern_list
+
+def search_buildlog(log_search_pattern_list, logfile_text_dict, tmp_index, 
max_text_lines):
+    # get text line to search
+    text_line = logfile_text_dict[tmp_index]
+    summery_dict = {}
+    # loop true the pattern list for match
+    for search_pattern in log_search_pattern_list:
+        search_hit = False
+        if search_pattern['search_type'] == 'in':
+            if search_pattern['search'] in text_line:
+                search_hit = True
+        if search_pattern['search_type'] == 'startswith':
+            if text_line.startswith(search_pattern['search']):
+                search_hit = True
+        if search_pattern['search_type'] == 'endswith':
+            if text_line.endswith(search_pattern['search']):
+                search_hit = True
+        if search_pattern['search_type'] == 'search':
+            if re.search(search_pattern['search'], text_line):
+                search_hit = True
+        # add the line if the pattern match
+        if search_hit:
+            summery_dict[tmp_index] = {}
+            summery_dict[tmp_index]['text'] = text_line
+            summery_dict[tmp_index]['type'] = search_pattern['type']
+            summery_dict[tmp_index]['status'] = search_pattern['status']
+            summery_dict[tmp_index]['search_pattern_id'] = search_pattern['id']
+            # add upper text lines if requested
+            # max 5
+            if search_pattern['start'] != 0:
+                i = tmp_index - search_pattern['start'] - 1
+                match = True
+                while match:
+                    i = i + 1
+                    if i < (tmp_index - 9) or i == tmp_index:
+                        match = False
+                    else:
+                        if not i in summery_dict:
+                            summery_dict[i] = {}
+                            summery_dict[i]['text'] = logfile_text_dict[i]
+                            summery_dict[i]['type'] = 'info'
+                            summery_dict[i]['status'] = 'info'
+            # add lower text lines if requested
+            # max 5
+            if search_pattern['end'] != 0:
+                i = tmp_index
+                end = tmp_index + search_pattern['end']
+                match = True
+                while match:
+                    i = i + 1
+                    if i > max_text_lines or i > end:
+                        match = False
+                    else:
+                        if not i in summery_dict:
+                            summery_dict[i] = {}
+                            summery_dict[i]['text'] = logfile_text_dict[i]
+                            summery_dict[i]['type'] = 'info'
+                            summery_dict[i]['status'] = 'info'
+        else:
+            # we add all line that start with ' * ' as info
+            # we add all line that start with '>>>' but not '>>> /' as info
+            if text_line.startswith(' * ') or (text_line.startswith('>>>') and 
not text_line.startswith('>>> /')):
+                if not tmp_index in summery_dict:
+                    summery_dict[tmp_index] = {}
+                    summery_dict[tmp_index]['text'] = text_line
+                    summery_dict[tmp_index]['type'] = 'info'
+                    summery_dict[tmp_index]['status'] = 'info'
+    #FIXME: print json
+    if summery_dict == {}:
+        return None
+    return summery_dict
+
+def getConfigSettings():
+    configpath = os.getcwd().split('workers/')[0]
+    with open(configpath + 'logparser.json') as f:
+        config = json.load(f)
+    return config
+
+def runLogParser(args):
+    index = 1
+    max_text_lines = 0
+    logfile_text_dict = {}
+    config = getConfigSettings()
+    Session = getDBSession(config)
+    mp_pool = getMultiprocessingPool(config)
+    #NOTE: The patten is from 
https://github.com/toralf/tinderbox/tree/master/data files.
+    # Is stored in a db instead of files.
+    log_search_pattern_list = get_log_search_pattern(Session, args.uuid, 
config['default_uuid'])
+    with io.TextIOWrapper(io.BufferedReader(gzip.open(args.file, 'rb'))) as f:
+            #FIXME: add support for multiprocessing
+            for text_line in f:
+                logfile_text_dict[index] = text_line.strip('\n')
+                # run the parse patten on the line
+                #search_buildlog(log_search_pattern_list, logfile_text_dict, 
index, max_text_lines)
+                res = mp_pool.apply_async(search_buildlog, 
(log_search_pattern_list, logfile_text_dict, index, max_text_lines,))
+                if res.get() is not None:
+                    print(json.dumps(res.get()))
+                # remove text line that we don't need any more
+                if index >= 20:
+                    del logfile_text_dict[index - 19]
+                index = index + 1
+                max_text_lines = index
+            mp_pool.close()
+            mp_pool.join()
+            f.close()
+    Session.close()

diff --git a/buildbot_gentoo_ci/steps/builders.py 
b/buildbot_gentoo_ci/steps/builders.py
index 4c9444a..7247abd 100644
--- a/buildbot_gentoo_ci/steps/builders.py
+++ b/buildbot_gentoo_ci/steps/builders.py
@@ -295,16 +295,15 @@ class SetupPropertys(BuildStep):
         self.setProperty('pkg_check_log_data', None, 'pkg_check_log_data')
         self.setProperty('faild_version_data', None, 'faild_version_data')
         self.setProperty('rerun', 0, 'rerun')
-        print(self.getProperty("buildnumber"))
         project_build_data = self.getProperty('project_build_data')
+        project_build_data['status'] = 'in-progress'
+        project_build_data['buildbot_build_id'] = 
self.getProperty("buildnumber")
         yield self.gentooci.db.builds.setSatusBuilds(
-                                                    
project_build_data['build_id'],
-                                                    
project_build_data['project_uuid'],
-                                                    'in-progress')
+                                                    project_build_data['id'],
+                                                    
project_build_data['status'])
         yield self.gentooci.db.builds.setBuildbotBuildIdBuilds(
-                                                    
project_build_data['build_id'],
-                                                    
project_build_data['project_uuid'],
-                                                    
self.getProperty("buildnumber"))
+                                                    project_build_data['id'],
+                                                    
project_build_data['buildbot_build_id'])
         self.setProperty('project_build_data', project_build_data, 
'project_build_data')
         print(self.getProperty("project_build_data"))
         return SUCCESS
@@ -583,9 +582,17 @@ class CheckEmergeLogs(BuildStep):
         version_data = yield 
self.gentooci.db.versions.getVersionByName(version, package_data['uuid'])
         return version_data
 
+    @defer.inlineCallbacks
+    def createDistDir(self):
+        workdir = yield os.path.join(self.master.basedir, 'workers', 
self.getProperty('workername'))
+        self.aftersteps_list.append(steps.MasterShellCommand(
+            command=['mkdir', str(self.getProperty("buildnumber"))],
+            workdir=workdir
+        ))
+
     @defer.inlineCallbacks
     def getLogFile(self, cpv, log_dict):
-        masterdest = yield os.path.join(self.master.basedir, 'cpv_logs', 
log_dict[cpv]['full_logname'])
+        masterdest = yield os.path.join(self.master.basedir, 'workers', 
self.getProperty('workername'), str(self.getProperty("buildnumber")) 
,log_dict[cpv]['full_logname'])
         self.aftersteps_list.append(steps.FileUpload(
             workersrc=log_dict[cpv]['log_path'],
             masterdest=masterdest
@@ -753,6 +760,7 @@ class CheckEmergeLogs(BuildStep):
                 if cpv in log_dict or faild_cpv in log_dict:
                     if cpv in log_dict:
                         self.log_data[cpv] = log_dict[cpv]
+                        yield self.createDistDir()
                         yield self.getLogFile(cpv, log_dict)
                         faild_version_data = False
                     if faild_cpv:
@@ -777,7 +785,8 @@ class CheckEmergeLogs(BuildStep):
                             'repository_data' : 
self.getProperty('repository_data'),
                             'faild_cpv' : faild_cpv,
                             'step' : self.step,
-                            'emerge_info' : 
self.getProperty('emerge_info_output')['emerge_info']
+                            'emerge_info' : 
self.getProperty('emerge_info_output')['emerge_info'],
+                            'build_workername' : self.getProperty('workername')
                         }
                     ))
         if not self.step is None and self.aftersteps_list != []:

diff --git a/buildbot_gentoo_ci/steps/logs.py b/buildbot_gentoo_ci/steps/logs.py
index 08ec6ce..e6e8495 100644
--- a/buildbot_gentoo_ci/steps/logs.py
+++ b/buildbot_gentoo_ci/steps/logs.py
@@ -6,6 +6,7 @@ import re
 import gzip
 import io
 import hashlib
+import json
 
 from portage.versions import catpkgsplit
 
@@ -20,6 +21,22 @@ from buildbot.process.results import SKIPPED
 from buildbot.plugins import steps
 
 from buildbot_gentoo_ci.steps import minio
+from buildbot_gentoo_ci.steps import master as master_steps
+
+def PersOutputOfLogParser(rc, stdout, stderr):
+    build_summery_output = {}
+    build_summery_output['rc'] = rc
+    build_summery_output_json_list = []
+    # split the lines
+    for line in stdout.split('\n'):
+        #FIXME: check if line start with {[1-9]: {
+        if line.startswith('{'):
+            build_summery_output_json_list.append(json.loads(line))
+    build_summery_output['build_summery_output_json'] = 
build_summery_output_json_list
+    #FIXME: Handling of stderr output
+    return {
+        'build_summery_output' : build_summery_output
+        }
 
 class SetupPropertys(BuildStep):
     
@@ -46,6 +63,44 @@ class SetupPropertys(BuildStep):
         self.setProperty("status", 'completed', 'status')
         return SUCCESS
 
+class SetupParserBuildLoger(BuildStep):
+
+    name = 'SetupParserBuildLoger'
+    description = 'Running'
+    descriptionDone = 'Ran'
+    descriptionSuffix = None
+    haltOnFailure = True
+    flunkOnFailure = True
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    @defer.inlineCallbacks
+    def run(self):
+        workdir = yield os.path.join(self.master.basedir, 'workers', 
self.getProperty('build_workername'), 
str(self.getProperty("project_build_data")['buildbot_build_id']))
+        if self.getProperty('faild_cpv'):
+            log_cpv = 
self.getProperty('log_build_data')[self.getProperty('faild_cpv')]
+        else:
+            log_cpv = 
self.getProperty('log_build_data')[self.getProperty('cpv')]
+        command = []
+        command.append('ci_log_parser')
+        command.append('-f')
+        command.append(log_cpv['full_logname'])
+        command.append('-u')
+        command.append(self.getProperty('project_data')['uuid'])
+        self.aftersteps_list = []
+        self.aftersteps_list.append(master_steps.MasterSetPropertyFromCommand(
+                                                            name = 
'RunBuildLogParser',
+                                                            haltOnFailure = 
True,
+                                                            flunkOnFailure = 
True,
+                                                            command=command,
+                                                            workdir=workdir,
+                                                            strip=False,
+                                                            
extract_fn=PersOutputOfLogParser
+                                                            ))
+        yield self.build.addStepsAfterCurrentStep(self.aftersteps_list)
+        return SUCCESS
+
 class ParserBuildLog(BuildStep):
 
     name = 'ParserBuildLog'
@@ -176,7 +231,7 @@ class ParserBuildLog(BuildStep):
             log_cpv = 
self.getProperty('log_build_data')[self.getProperty('faild_cpv')]
         else:
             log_cpv = 
self.getProperty('log_build_data')[self.getProperty('cpv')]
-        file_path = yield os.path.join(self.master.basedir, 'cpv_logs', 
log_cpv['full_logname'])
+        file_path = yield os.path.join(self.master.basedir, 'workers', 
self.getProperty('build_workername'), 
str(self.getProperty("project_build_data")['buildbot_build_id']) 
,log_cpv['full_logname'])
         #FIXME: decode it to utf-8
         with io.TextIOWrapper(io.BufferedReader(gzip.open(file_path, 'rb'))) 
as f:
             for text_line in f:
@@ -214,18 +269,19 @@ class MakeIssue(BuildStep):
     #@defer.inlineCallbacks
     def run(self):
         self.gentooci = 
self.master.namedServices['services'].namedServices['gentooci']
-        summary_log_dict = self.getProperty('summary_log_dict')
+        summary_log_dict_list = 
self.getProperty('build_summery_output')['build_summery_output_json']
         error = False
         warning = False
         self.summary_log_list = []
         log_hash = hashlib.sha256()
-        for k, v in sorted(summary_log_dict.items()):
-            if v['status'] == 'error':
-                error = True
-            if v['status'] == 'warning':
-                warning = True
-            self.summary_log_list.append(v['text'])
-            log_hash.update(v['text'].encode('utf-8'))
+        for summary_log_dict in summary_log_dict_list:
+            for k, v in sorted(summary_log_dict.items()):
+                if v['status'] == 'error':
+                    error = True
+                if v['status'] == 'warning':
+                    warning = True
+                self.summary_log_list.append(v['text'])
+                log_hash.update(v['text'].encode('utf-8'))
         # add build log
         # add issue/bug/pr report
         self.setProperty("summary_log_list", self.summary_log_list, 
'summary_log_list')
@@ -300,7 +356,7 @@ class Upload(BuildStep):
         else:
             log_cpv = 
self.getProperty('log_build_data')[self.getProperty('cpv')]
         bucket = self.getProperty('project_data')['uuid'] + '-' + 'logs'
-        file_path = yield os.path.join(self.master.basedir, 'cpv_logs', 
log_cpv['full_logname'])
+        file_path = yield os.path.join(self.master.basedir, 'workers', 
self.getProperty('build_workername'), 
str(self.getProperty("project_build_data")['buildbot_build_id']) 
,log_cpv['full_logname'])
         aftersteps_list = []
         aftersteps_list.append(minio.putFileToMinio(file_path, 
log_cpv['full_logname'], bucket))
         yield self.build.addStepsAfterCurrentStep(aftersteps_list)
@@ -374,8 +430,7 @@ class setBuildStatus(BuildStep):
         self.gentooci = 
self.master.namedServices['services'].namedServices['gentooci']
         project_build_data = self.getProperty('project_build_data')
         yield self.gentooci.db.builds.setSatusBuilds(
-                                                    
project_build_data['build_id'],
-                                                    
project_build_data['project_uuid'],
+                                                    project_build_data['id'],
                                                     self.getProperty('status')
                                                     )
         if self.getProperty('status') == 'failed':

diff --git a/logparser.json b/logparser.json
new file mode 100644
index 0000000..9d4570d
--- /dev/null
+++ b/logparser.json
@@ -0,0 +1 @@
+{"database": "postgresql+psycopg2://user:password@host/gentoo-ci", 
"default_uuid" : "uuid", "core" : "2" }

Reply via email to