tags 615817 + patch thanks Hi!
I am trying the attached patch. I will keep you posted of my results.
Index: /sandbox/spam-filter-captcha/tracspamfilter/filters/bayes.py =================================================================== --- spam-filter-captcha/tracspamfilter/filters/bayes.py (revision 4731) +++ spam-filter-captcha/tracspamfilter/filters/bayes.py (revision 8319) @@ -104,5 +104,10 @@ self.nspam = self.nham = 0 + def _sanitize(self, text): + """Remove invalid byte sequences from utf-8 encoded text""" + return text.decode('utf-8', 'ignore') + def _get_row(self, word): + word = self._sanitize(word) cursor = self.db.cursor() cursor.execute("SELECT nspam,nham FROM spamfilter_bayes WHERE word=%s", @@ -111,8 +116,8 @@ if not row: return {} - return {'nspam': row[0], 'nham': row[1]} def _set_row(self, word, nspam, nham): + word = self._sanitize(word) cursor = self.db.cursor() if self._has_key(word): @@ -125,4 +130,5 @@ def _delete_row(self, word): + word = self._sanitize(word) cursor = self.db.cursor() cursor.execute("DELETE FROM spamfilter_bayes WHERE word=%s", (word,)) @@ -130,4 +136,5 @@ def _has_key(self, key): + key = self._sanitize(key) cursor = self.db.cursor() cursor.execute("SELECT COUNT(*) FROM spamfilter_bayes WHERE word=%s", Index: /plugins/0.12/spam-filter-captcha/tracspamfilter/filters/bayes.py =================================================================== --- spam-filter-captcha/tracspamfilter/filters/bayes.py (revision 9932) +++ spam-filter-captcha/tracspamfilter/filters/bayes.py (revision 9933) @@ -76,5 +76,5 @@ hammie = self._get_hammie() - hammie.train(content.encode('utf-8'), spam) + hammie.train(content.encode('utf-8','ignore'), spam) hammie.store() @@ -108,4 +108,6 @@ def _sanitize(self, text): + if isinstance(text, unicode): + return text """Remove invalid byte sequences from utf-8 encoded text""" return text.decode('utf-8', 'ignore') @@ -154,7 +156,4 @@ def _wordinfoget(self, word): - if isinstance(word, unicode): - word = word.encode("utf-8") - row = self._get_row(word) if row:
-- Let the machine do the dirty work. - The Elements of Programming Style (Kernighan & Plauger)