Bug#642343: backport of patches to fix the issue

Riccardo Magliocchetti Mon, 03 Sep 2012 10:24:26 -0700

Hello,

attached the backport of all fixes applied upstream to fix the issue.Also attached each individual patch for easier review.

One patch deals with logs not ascii encoded, the other three fix someparsing of ill formed input data. Please note that the ill formed inputdata is not malicious or have been forged / fuzzed, it has been providedby the bootchart2 collector which is the companion tool ofpybootchartgui. So parsing bugs makes the data collected unuseful.


This is the cumulative diffstat:

 parsing.py |   21 ++++++++++-----------
 samples.py |   10 +++++++++-
 2 files changed, 19 insertions(+), 12 deletions(-)

thanks,
riccardo

diff --git a/pybootchartgui/parsing.py b/pybootchartgui/parsing.py
index 99fa1ec..7728942 100644
--- a/pybootchartgui/parsing.py
+++ b/pybootchartgui/parsing.py
@@ -228,7 +228,7 @@ def _parse_headers(file):
             value = line.strip()
         headers[last] += value
         return headers, last
-    return reduce(parse, file.read().decode().split('\n'), (defaultdict(str),''))[0]
+    return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0]
 
 def _parse_timed_blocks(file):
     """Parses (ie., splits) a file into so-called timed-blocks. A
@@ -242,7 +242,7 @@ def _parse_timed_blocks(file):
             return (int(lines[0]), lines[1:])
         except ValueError:
             raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
-    blocks = file.read().decode().split('\n\n')
+    blocks = file.read().decode('utf-8').split('\n\n')
     return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')]
 
 def _parse_proc_ps_log(writer, file):
@@ -315,6 +315,8 @@ def _parse_taskstats_log(writer, file):
         for line in lines:
             if line is '': continue
             tokens = line.split(' ')
+            if len(tokens) != 6:
+                continue
 
             opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2]
             try:
@@ -450,8 +452,6 @@ def _parse_proc_meminfo_log(file):
     Parse file for global memory statistics.
     The format of relevant lines should be: ^key: value( unit)?
     """
-    used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',)
-
     mem_stats = []
     meminfo_re = re.compile(r'([^ \t:]+):\s*(\d+).*')
 
@@ -460,12 +460,11 @@ def _parse_proc_meminfo_log(file):
 
         for line in lines:
             match = meminfo_re.match(line)
-            if not match:
-                raise ParseError("Invalid meminfo line \"%s\"" % match.groups(0))
-            if match.group(1) in used_values:
+            if match:
                 sample.add_value(match.group(1), int(match.group(2)))
 
-        mem_stats.append(sample)
+        if sample.valid():
+            mem_stats.append(sample)
 
     return mem_stats
 
@@ -491,7 +490,7 @@ def _parse_dmesg(writer, file):
     processMap['k-boot'] = kernel
     base_ts = False
     max_ts = 0
-    for line in file.read().decode().split('\n'):
+    for line in file.read().decode('utf-8').split('\n'):
         t = timestamp_re.match (line)
         if t is None:
 #                       print "duff timestamp " + line
@@ -579,7 +578,7 @@ def _parse_pacct(writer, file):
 def _parse_paternity_log(writer, file):
     parent_map = {}
     parent_map[0] = 0
-    for line in file.read().decode().split('\n'):
+    for line in file.read().decode('utf-8').split('\n'):
         elems = line.split(' ') # <Child> <Parent>
         if len (elems) >= 2:
 #                       print "paternity of %d is %d" % (int(elems[0]), int(elems[1]))
@@ -590,7 +589,7 @@ def _parse_paternity_log(writer, file):
 
 def _parse_cmdline_log(writer, file):
     cmdLines = {}
-    for block in file.read().decode().split('\n\n'):
+    for block in file.read().decode('utf-8').split('\n\n'):
         lines = block.split('\n')
         if len (lines) >= 3:
 #                       print "Lines '%s'" % (lines[0])
diff --git a/pybootchartgui/samples.py b/pybootchartgui/samples.py
index ce703b8..a7da2ea 100644
--- a/pybootchartgui/samples.py
+++ b/pybootchartgui/samples.py
@@ -38,12 +38,20 @@ class CPUSample:
                str(self.sys) + "\t" + str(self.io) + "\t" + str (self.swap)
 
 class MemSample:
+    used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',)
+
     def __init__(self, time):
         self.time = time
         self.records = {}
 
     def add_value(self, name, value):
-        self.records[name] = value
+        if name in MemSample.used_values:
+            self.records[name] = value
+
+    def valid(self):
+        keys = self.records.keys()
+        # discard incomplete samples
+        return [v for v in MemSample.used_values if v not in keys] == []
 
 class ProcessSample:
     def __init__(self, time, state, cpu_sample):

>From 62a80e0848bf3f5119a32bb82c7e141bd49d5e11 Mon Sep 17 00:00:00 2001
From: Riccardo Magliocchetti <riccardo.magliocche...@gmail.com>
Date: Fri, 17 Aug 2012 18:02:12 +0200
Subject: [PATCH] pybootchartgui: be more tolerant when parsing
 proc_meminfo.log

waasdorp reports in bug #40 that we are raising an exception if
a line in proc_meminfo.log is not well formed.
He is right because we were trying to access matches of a regexp
that did not match.

That is obviously silly but it is also silly to stop processing
all the data if a line is wrong, even a line that we are not
interested in it. Instead ignore the line and skip the sample
if it is not valid.

Example of broken line:

Shmem:

While at it remove an unused variable
---
 pybootchartgui/parsing.py |    7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pybootchartgui/parsing.py b/pybootchartgui/parsing.py
index 0c54884..845b3ae 100644
--- a/pybootchartgui/parsing.py
+++ b/pybootchartgui/parsing.py
@@ -453,8 +453,6 @@ def _parse_proc_meminfo_log(file):
     Parse file for global memory statistics.
     The format of relevant lines should be: ^key: value( unit)?
     """
-    used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',)
-
     mem_stats = []
     meminfo_re = re.compile(r'([^ \t:]+):\s*(\d+).*')
 
@@ -463,9 +461,8 @@ def _parse_proc_meminfo_log(file):
 
         for line in lines:
             match = meminfo_re.match(line)
-            if not match:
-                raise ParseError("Invalid meminfo line \"%s\"" % match.groups(0))
-            sample.add_value(match.group(1), int(match.group(2)))
+            if match:
+                sample.add_value(match.group(1), int(match.group(2)))
 
         if sample.valid():
             mem_stats.append(sample)
-- 
1.7.10.4

>From d39566348784eb80672f0d0bd78f0a839424281c Mon Sep 17 00:00:00 2001
From: Riccardo Magliocchetti <riccardo.magliocche...@gmail.com>
Date: Sun, 17 Jun 2012 14:50:05 +0200
Subject: [PATCH] pybootchartgui: ensure a MemSample is valid before storing
 it

Fix #39
---
 pybootchartgui/parsing.py |    6 +++---
 pybootchartgui/samples.py |   10 +++++++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/pybootchartgui/parsing.py b/pybootchartgui/parsing.py
index 778149b..18eb17c 100644
--- a/pybootchartgui/parsing.py
+++ b/pybootchartgui/parsing.py
@@ -465,10 +465,10 @@ def _parse_proc_meminfo_log(file):
             match = meminfo_re.match(line)
             if not match:
                 raise ParseError("Invalid meminfo line \"%s\"" % match.groups(0))
-            if match.group(1) in used_values:
-                sample.add_value(match.group(1), int(match.group(2)))
+            sample.add_value(match.group(1), int(match.group(2)))
 
-        mem_stats.append(sample)
+        if sample.valid():
+            mem_stats.append(sample)
 
     return mem_stats
 
diff --git a/pybootchartgui/samples.py b/pybootchartgui/samples.py
index ce703b8..a7da2ea 100644
--- a/pybootchartgui/samples.py
+++ b/pybootchartgui/samples.py
@@ -38,12 +38,20 @@ class CPUSample:
                str(self.sys) + "\t" + str(self.io) + "\t" + str (self.swap)
 
 class MemSample:
+    used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',)
+
     def __init__(self, time):
         self.time = time
         self.records = {}
 
     def add_value(self, name, value):
-        self.records[name] = value
+        if name in MemSample.used_values:
+            self.records[name] = value
+
+    def valid(self):
+        keys = self.records.keys()
+        # discard incomplete samples
+        return [v for v in MemSample.used_values if v not in keys] == []
 
 class ProcessSample:
     def __init__(self, time, state, cpu_sample):
-- 
1.7.10.4

>From 77d7c35c9c03b21fdc8a1136e1d935965336ffa7 Mon Sep 17 00:00:00 2001
From: Riccardo Magliocchetti <riccardo.magliocche...@gmail.com>
Date: Sun, 17 Jun 2012 14:34:30 +0200
Subject: [PATCH] pybootchartgui: skip malformed taskstats lines

Partially fix #39
---
 pybootchartgui/parsing.py |    2 ++
 1 file changed, 2 insertions(+)

diff --git a/pybootchartgui/parsing.py b/pybootchartgui/parsing.py
index 0b6ff66..778149b 100644
--- a/pybootchartgui/parsing.py
+++ b/pybootchartgui/parsing.py
@@ -319,6 +319,8 @@ def _parse_taskstats_log(writer, file):
         for line in lines:
             if line is '': continue
             tokens = line.split(' ')
+            if len(tokens) != 6:
+                continue
 
             opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2]
             cpu_ns, blkio_delay_ns, swapin_delay_ns = long(tokens[-3]), long(tokens[-2]), long(tokens[-1]),
-- 
1.7.10.4

>From 9a7ff9823ae8562e11094a5a7155af067733ae95 Mon Sep 17 00:00:00 2001
From: Riccardo Magliocchetti <riccardo.magliocche...@gmail.com>
Date: Sun, 17 Jun 2012 12:53:42 +0200
Subject: [PATCH] pybootchartgui: fix parsing of non-ascii bytes in logs

It looks like the days of ascii logs is gone so decode all the
files we read as utf-8. Fix #38.
---
 pybootchartgui/parsing.py |   10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pybootchartgui/parsing.py b/pybootchartgui/parsing.py
index 99fa1ec..124a487 100644
--- a/pybootchartgui/parsing.py
+++ b/pybootchartgui/parsing.py
@@ -228,7 +228,7 @@ def _parse_headers(file):
             value = line.strip()
         headers[last] += value
         return headers, last
-    return reduce(parse, file.read().decode().split('\n'), (defaultdict(str),''))[0]
+    return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0]
 
 def _parse_timed_blocks(file):
     """Parses (ie., splits) a file into so-called timed-blocks. A
@@ -242,7 +242,7 @@ def _parse_timed_blocks(file):
             return (int(lines[0]), lines[1:])
         except ValueError:
             raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
-    blocks = file.read().decode().split('\n\n')
+    blocks = file.read().decode('utf-8').split('\n\n')
     return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')]
 
 def _parse_proc_ps_log(writer, file):
@@ -491,7 +491,7 @@ def _parse_dmesg(writer, file):
     processMap['k-boot'] = kernel
     base_ts = False
     max_ts = 0
-    for line in file.read().decode().split('\n'):
+    for line in file.read().decode('utf-8').split('\n'):
         t = timestamp_re.match (line)
         if t is None:
 #                       print "duff timestamp " + line
@@ -579,7 +579,7 @@ def _parse_pacct(writer, file):
 def _parse_paternity_log(writer, file):
     parent_map = {}
     parent_map[0] = 0
-    for line in file.read().decode().split('\n'):
+    for line in file.read().decode('utf-8').split('\n'):
         elems = line.split(' ') # <Child> <Parent>
         if len (elems) >= 2:
 #                       print "paternity of %d is %d" % (int(elems[0]), int(elems[1]))
@@ -590,7 +590,7 @@ def _parse_paternity_log(writer, file):
 
 def _parse_cmdline_log(writer, file):
     cmdLines = {}
-    for block in file.read().decode().split('\n\n'):
+    for block in file.read().decode('utf-8').split('\n\n'):
         lines = block.split('\n')
         if len (lines) >= 3:
 #                       print "Lines '%s'" % (lines[0])
-- 
1.7.10.4

Bug#642343: backport of patches to fix the issue

Reply via email to