Bug#1078666: python3-whoosh: Many syntax errors with Python12

Peter Chubb Tue, 13 Aug 2024 19:24:16 -0700

Package: python3-whoosh
Version: 2.7.4+git6-g9134ad92-8
Severity: normal
Tags: patch


Dear Maintainer,

 I have whoosh installed as a dependency for the mailman3 suite.
 Whenever it runs I see:
/usr/lib/python3/dist-packages/whoosh/analysis/filters.py:50: SyntaxWarning: 
invalid escape sequence '\w'
  url_pattern = rcompile("""
/usr/lib/python3/dist-packages/whoosh/analysis/filters.py:148: SyntaxWarning: 
invalid escape sequence '\S'
  """Interleaves the results of two or more filters (or filter chains).
/usr/lib/python3/dist-packages/whoosh/analysis/intraword.py:37: SyntaxWarning: 
invalid escape sequence '\S'
  """Given a set of words (or any object with a ``__contains__`` method),
/usr/lib/python3/dist-packages/whoosh/analysis/intraword.py:224: SyntaxWarning: 
invalid escape sequence '\S'
  """Splits words into subwords and performs optional transformations on
/usr/lib/python3/dist-packages/whoosh/analysis/intraword.py:285: SyntaxWarning: 
invalid escape sequence '\|'
  def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\|;:,./?`~=+"),
/usr/lib/python3/dist-packages/whoosh/codec/whoosh3.py:1116: SyntaxWarning: 
"is" with 'int' literal. Did you mean "=="?
   elif fixedsize is 0:

I get my email inbox spammed with this once an hour as part of 
the indexing process for mailman3-web.


-- System Information:
Debian Release: trixie/sid
  APT prefers stable-updates
  APT policy: (500, 'stable-updates'), (500, 'stable-security'), (500, 
'unstable'), (500, 'stable')
Architecture: amd64 (x86_64)

Kernel: Linux 6.10.4-cloud-amd64 (SMP w/1 CPU thread; PREEMPT)
Locale: LANG=en_AU.UTF-8, LC_CTYPE=en_AU.UTF-8 (charmap=UTF-8), LANGUAGE not set
Shell: /bin/sh linked to /usr/bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled

Versions of packages python3-whoosh depends on:
ii  python3  3.12.5-1

python3-whoosh recommends no packages.

Versions of packages python3-whoosh suggests:
pn  python-whoosh-doc  <none>

-- no debconf information

Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/filters.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/analysis/filters.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/filters.py
@@ -47,11 +47,11 @@ STOP_WORDS = frozenset(('a', 'an', 'and'
 
 # Simple pattern for filtering URLs, may be useful
 
-url_pattern = rcompile("""
+url_pattern = rcompile(r"""
 (
     [A-Za-z+]+://          # URL protocol
-    \\S+?                  # URL body
-    (?=\\s|[.]\\s|$|[.]$)  # Stop at space/end, or a dot followed by space/end
+    \S+?                  # URL body
+    (?=\s|[.]\s|$|[.]$)  # Stop at space/end, or a dot followed by space/end
 ) | (                      # or...
     \w+([:.]?\w+)*         # word characters, with opt. internal colons/dots
 )
@@ -155,7 +155,7 @@ class TeeFilter(Filter):
     >>> f1 = LowercaseFilter()
     >>> # In the other branch, we'll reverse the tokens
     >>> f2 = ReverseTextFilter()
-    >>> ana = RegexTokenizer(r"\S+") | TeeFilter(f1, f2)
+    >>> ana = RegexTokenizer(r"\\S+") | TeeFilter(f1, f2)
     >>> [token.text for token in ana(target)]
     ["alfa", "AFLA", "bravo", "OVARB", "charlie", "EILRAHC"]
 
@@ -164,7 +164,7 @@ class TeeFilter(Filter):
 
     >>> f1 = PassFilter()
     >>> f2 = BiWordFilter()
-    >>> ana = RegexTokenizer(r"\S+") | TeeFilter(f1, f2) | LowercaseFilter()
+    >>> ana = RegexTokenizer(r"\\S+") | TeeFilter(f1, f2) | LowercaseFilter()
     >>> [token.text for token in ana(target)]
     ["alfa", "alfa-bravo", "bravo", "bravo-charlie", "charlie"]
     """
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/intraword.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/analysis/intraword.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/intraword.py
@@ -46,7 +46,7 @@ class CompoundWordFilter(Filter):
     compound word in the token stream along with the word segments.
 
     >>> cwf = CompoundWordFilter(wordset, keep_compound=True)
-    >>> analyzer = RegexTokenizer(r"\S+") | cwf
+    >>> analyzer = RegexTokenizer(r"\\S+") | cwf
     >>> [t.text for t in analyzer("I do not like greeneggs and ham")
     ["I", "do", "not", "like", "greeneggs", "green", "eggs", "and", "ham"]
     >>> cwf.keep_compound = False
@@ -221,7 +221,7 @@ class ShingleFilter(Filter):
 
 
 class IntraWordFilter(Filter):
-    """Splits words into subwords and performs optional transformations on
+    r"""Splits words into subwords and performs optional transformations on
     subword groups. This filter is funtionally based on yonik's
     WordDelimiterFilter in Solr, but shares no code with it.
 
@@ -272,7 +272,7 @@ class IntraWordFilter(Filter):
     >>> iwf_i = IntraWordFilter(mergewords=True, mergenums=True)
     >>> iwf_q = IntraWordFilter(mergewords=False, mergenums=False)
     >>> iwf = MultiFilter(index=iwf_i, query=iwf_q)
-    >>> analyzer = RegexTokenizer(r"\S+") | iwf | LowercaseFilter()
+    >>> analyzer = RegexTokenizer(r"\\S+") | iwf | LowercaseFilter()
 
     (See :class:`MultiFilter`.)
     """
@@ -282,7 +282,7 @@ class IntraWordFilter(Filter):
     __inittypes__ = dict(delims=text_type, splitwords=bool, splitnums=bool,
                          mergewords=bool, mergenums=bool)
 
-    def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\|;:,./?`~=+"),
+    def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\\|;:,./?`~=+"),
                  splitwords=True, splitnums=True,
                  mergewords=False, mergenums=False):
         """
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/codec/whoosh3.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/codec/whoosh3.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/codec/whoosh3.py
@@ -1113,7 +1113,7 @@ class W3LeafMatcher(LeafMatcher):
         vs = self._data[2]
         if fixedsize is None or fixedsize < 0:
             self._values = vs
-        elif fixedsize is 0:
+        elif fixedsize == 0:
             self._values = (None,) * self._blocklength
         else:
             assert isinstance(vs, bytes_type)
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/paicehusk.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/lang/paicehusk.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/paicehusk.py
@@ -30,7 +30,7 @@ class PaiceHuskStemmer(object):
     (?P<cont>[.>])
     """, re.UNICODE | re.VERBOSE)
 
-    stem_expr = re.compile("^\w+", re.UNICODE)
+    stem_expr = re.compile(r"^\w+", re.UNICODE)
 
     def __init__(self, ruletable):
         """
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/lang/porter.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter.py
@@ -64,14 +64,14 @@ _c_v = re.compile("^" + _cons_seq + _vow
 
 # Patterns used in the rules
 
-_ed_ing = re.compile("^(.*)(ed|ing)$")
-_at_bl_iz = re.compile("(at|bl|iz)$")
-_step1b = re.compile("([^aeiouylsz])\\1$")
-_step2 = 
re.compile("^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$")
-_step3 = re.compile("^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$")
-_step4_1 = 
re.compile("^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$")
-_step4_2 = re.compile("^(.+?)(s|t)(ion)$")
-_step5 = re.compile("^(.+?)e$")
+_ed_ing = re.compile(r"^(.*)(ed|ing)$")
+_at_bl_iz = re.compile(r"(at|bl|iz)$")
+_step1b = re.compile(r"([^aeiouylsz])\1$")
+_step2 = 
re.compile(r"^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$")
+_step3 = re.compile(r"^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$")
+_step4_1 = 
re.compile(r"^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$")
+_step4_2 = re.compile(r"^(.+?)(s|t)(ion)$")
+_step5 = re.compile(r"^(.+?)e$")
 
 
 # Stemming function
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter2.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/lang/porter2.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter2.py
@@ -64,7 +64,7 @@ def remove_initial_apostrophe(word):
 def capitalize_consonant_ys(word):
     if word.startswith('y'):
         word = 'Y' + word[1:]
-    return ccy_exp.sub('\g<1>Y', word)
+    return ccy_exp.sub(r'\g<1>Y', word)
 
 
 def step_0(word):

Bug#1078666: python3-whoosh: Many syntax errors with Python12

Reply via email to