Package: python3-whoosh Version: 2.7.4+git6-g9134ad92-8 Severity: normal Tags: patch
Dear Maintainer, I have whoosh installed as a dependency for the mailman3 suite. Whenever it runs I see: /usr/lib/python3/dist-packages/whoosh/analysis/filters.py:50: SyntaxWarning: invalid escape sequence '\w' url_pattern = rcompile(""" /usr/lib/python3/dist-packages/whoosh/analysis/filters.py:148: SyntaxWarning: invalid escape sequence '\S' """Interleaves the results of two or more filters (or filter chains). /usr/lib/python3/dist-packages/whoosh/analysis/intraword.py:37: SyntaxWarning: invalid escape sequence '\S' """Given a set of words (or any object with a ``__contains__`` method), /usr/lib/python3/dist-packages/whoosh/analysis/intraword.py:224: SyntaxWarning: invalid escape sequence '\S' """Splits words into subwords and performs optional transformations on /usr/lib/python3/dist-packages/whoosh/analysis/intraword.py:285: SyntaxWarning: invalid escape sequence '\|' def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\|;:,./?`~=+"), /usr/lib/python3/dist-packages/whoosh/codec/whoosh3.py:1116: SyntaxWarning: "is" with 'int' literal. Did you mean "=="? elif fixedsize is 0: I get my email inbox spammed with this once an hour as part of the indexing process for mailman3-web. -- System Information: Debian Release: trixie/sid APT prefers stable-updates APT policy: (500, 'stable-updates'), (500, 'stable-security'), (500, 'unstable'), (500, 'stable') Architecture: amd64 (x86_64) Kernel: Linux 6.10.4-cloud-amd64 (SMP w/1 CPU thread; PREEMPT) Locale: LANG=en_AU.UTF-8, LC_CTYPE=en_AU.UTF-8 (charmap=UTF-8), LANGUAGE not set Shell: /bin/sh linked to /usr/bin/dash Init: systemd (via /run/systemd/system) LSM: AppArmor: enabled Versions of packages python3-whoosh depends on: ii python3 3.12.5-1 python3-whoosh recommends no packages. Versions of packages python3-whoosh suggests: pn python-whoosh-doc <none> -- no debconf information
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/filters.py =================================================================== --- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/analysis/filters.py +++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/filters.py @@ -47,11 +47,11 @@ STOP_WORDS = frozenset(('a', 'an', 'and' # Simple pattern for filtering URLs, may be useful -url_pattern = rcompile(""" +url_pattern = rcompile(r""" ( [A-Za-z+]+:// # URL protocol - \\S+? # URL body - (?=\\s|[.]\\s|$|[.]$) # Stop at space/end, or a dot followed by space/end + \S+? # URL body + (?=\s|[.]\s|$|[.]$) # Stop at space/end, or a dot followed by space/end ) | ( # or... \w+([:.]?\w+)* # word characters, with opt. internal colons/dots ) @@ -155,7 +155,7 @@ class TeeFilter(Filter): >>> f1 = LowercaseFilter() >>> # In the other branch, we'll reverse the tokens >>> f2 = ReverseTextFilter() - >>> ana = RegexTokenizer(r"\S+") | TeeFilter(f1, f2) + >>> ana = RegexTokenizer(r"\\S+") | TeeFilter(f1, f2) >>> [token.text for token in ana(target)] ["alfa", "AFLA", "bravo", "OVARB", "charlie", "EILRAHC"] @@ -164,7 +164,7 @@ class TeeFilter(Filter): >>> f1 = PassFilter() >>> f2 = BiWordFilter() - >>> ana = RegexTokenizer(r"\S+") | TeeFilter(f1, f2) | LowercaseFilter() + >>> ana = RegexTokenizer(r"\\S+") | TeeFilter(f1, f2) | LowercaseFilter() >>> [token.text for token in ana(target)] ["alfa", "alfa-bravo", "bravo", "bravo-charlie", "charlie"] """ Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/intraword.py =================================================================== --- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/analysis/intraword.py +++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/intraword.py @@ -46,7 +46,7 @@ class CompoundWordFilter(Filter): compound word in the token stream along with the word segments. >>> cwf = CompoundWordFilter(wordset, keep_compound=True) - >>> analyzer = RegexTokenizer(r"\S+") | cwf + >>> analyzer = RegexTokenizer(r"\\S+") | cwf >>> [t.text for t in analyzer("I do not like greeneggs and ham") ["I", "do", "not", "like", "greeneggs", "green", "eggs", "and", "ham"] >>> cwf.keep_compound = False @@ -221,7 +221,7 @@ class ShingleFilter(Filter): class IntraWordFilter(Filter): - """Splits words into subwords and performs optional transformations on + r"""Splits words into subwords and performs optional transformations on subword groups. This filter is funtionally based on yonik's WordDelimiterFilter in Solr, but shares no code with it. @@ -272,7 +272,7 @@ class IntraWordFilter(Filter): >>> iwf_i = IntraWordFilter(mergewords=True, mergenums=True) >>> iwf_q = IntraWordFilter(mergewords=False, mergenums=False) >>> iwf = MultiFilter(index=iwf_i, query=iwf_q) - >>> analyzer = RegexTokenizer(r"\S+") | iwf | LowercaseFilter() + >>> analyzer = RegexTokenizer(r"\\S+") | iwf | LowercaseFilter() (See :class:`MultiFilter`.) """ @@ -282,7 +282,7 @@ class IntraWordFilter(Filter): __inittypes__ = dict(delims=text_type, splitwords=bool, splitnums=bool, mergewords=bool, mergenums=bool) - def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\|;:,./?`~=+"), + def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\\|;:,./?`~=+"), splitwords=True, splitnums=True, mergewords=False, mergenums=False): """ Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/codec/whoosh3.py =================================================================== --- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/codec/whoosh3.py +++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/codec/whoosh3.py @@ -1113,7 +1113,7 @@ class W3LeafMatcher(LeafMatcher): vs = self._data[2] if fixedsize is None or fixedsize < 0: self._values = vs - elif fixedsize is 0: + elif fixedsize == 0: self._values = (None,) * self._blocklength else: assert isinstance(vs, bytes_type) Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/paicehusk.py =================================================================== --- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/lang/paicehusk.py +++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/paicehusk.py @@ -30,7 +30,7 @@ class PaiceHuskStemmer(object): (?P<cont>[.>]) """, re.UNICODE | re.VERBOSE) - stem_expr = re.compile("^\w+", re.UNICODE) + stem_expr = re.compile(r"^\w+", re.UNICODE) def __init__(self, ruletable): """ Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter.py =================================================================== --- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/lang/porter.py +++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter.py @@ -64,14 +64,14 @@ _c_v = re.compile("^" + _cons_seq + _vow # Patterns used in the rules -_ed_ing = re.compile("^(.*)(ed|ing)$") -_at_bl_iz = re.compile("(at|bl|iz)$") -_step1b = re.compile("([^aeiouylsz])\\1$") -_step2 = re.compile("^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$") -_step3 = re.compile("^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$") -_step4_1 = re.compile("^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$") -_step4_2 = re.compile("^(.+?)(s|t)(ion)$") -_step5 = re.compile("^(.+?)e$") +_ed_ing = re.compile(r"^(.*)(ed|ing)$") +_at_bl_iz = re.compile(r"(at|bl|iz)$") +_step1b = re.compile(r"([^aeiouylsz])\1$") +_step2 = re.compile(r"^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$") +_step3 = re.compile(r"^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$") +_step4_1 = re.compile(r"^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$") +_step4_2 = re.compile(r"^(.+?)(s|t)(ion)$") +_step5 = re.compile(r"^(.+?)e$") # Stemming function Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter2.py =================================================================== --- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/lang/porter2.py +++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter2.py @@ -64,7 +64,7 @@ def remove_initial_apostrophe(word): def capitalize_consonant_ys(word): if word.startswith('y'): word = 'Y' + word[1:] - return ccy_exp.sub('\g<1>Y', word) + return ccy_exp.sub(r'\g<1>Y', word) def step_0(word):