Control: tags 1084323 + pending

Dear maintainer,

I've prepared an NMU for nltk (versioned as 3.9.1-1.1) using the patch I
sent a few days ago, and uploaded it to DELAYED/5.  Please feel free to
tell me if I should delay it longer.

Regards,

-- 
Colin Watson (he/him)                              [cjwat...@debian.org]
diff -Nru nltk-3.9.1/debian/changelog nltk-3.9.1/debian/changelog
--- nltk-3.9.1/debian/changelog	2024-10-02 07:03:25.000000000 +0100
+++ nltk-3.9.1/debian/changelog	2024-10-11 15:23:33.000000000 +0100
@@ -1,3 +1,10 @@
+nltk (3.9.1-1.1) unstable; urgency=medium
+
+  * Non-maintainer upload.
+  * Don't read the WordNet corpus before it is needed (closes: #1084323).
+
+ -- Colin Watson <cjwat...@debian.org>  Fri, 11 Oct 2024 15:23:33 +0100
+
 nltk (3.9.1-1) unstable; urgency=medium
 
   * New upstream version 3.9.1 (Closes: #1074423)
diff -Nru nltk-3.9.1/debian/patches/import-wordnet-corpus-lazily.patch nltk-3.9.1/debian/patches/import-wordnet-corpus-lazily.patch
--- nltk-3.9.1/debian/patches/import-wordnet-corpus-lazily.patch	1970-01-01 01:00:00.000000000 +0100
+++ nltk-3.9.1/debian/patches/import-wordnet-corpus-lazily.patch	2024-10-11 15:23:33.000000000 +0100
@@ -0,0 +1,122 @@
+From: Eric Kafe <kafe.e...@gmail.com>
+Date: Sun, 18 Aug 2024 16:09:01 +0200
+Subject: Fix bug in WordNetLemmatizer
+
+Fix #3308 by not importing WordNet's _morphy and morphy before they are needed.
+
+Origin: upstream, https://github.com/nltk/nltk/pull/3309
+Bug: https://github.com/nltk/nltk/issues/3308
+Bug-Debian: https://bugs.debian.org/1084323
+Last-Update: 2024-10-08
+---
+ nltk/stem/wordnet.py | 71 +++++++++++++++++++++++++++++-----------------------
+ 1 file changed, 39 insertions(+), 32 deletions(-)
+
+diff --git a/nltk/stem/wordnet.py b/nltk/stem/wordnet.py
+index 76caf1b..87d08c7 100644
+--- a/nltk/stem/wordnet.py
++++ b/nltk/stem/wordnet.py
+@@ -7,64 +7,71 @@
+ # URL: <https://www.nltk.org/>
+ # For license information, see LICENSE.TXT
+ 
+-from nltk.corpus import wordnet as wn
+-
+ 
+ class WordNetLemmatizer:
+     """
+     WordNet Lemmatizer
+ 
+-    Provides 3 lemmatizer modes:
+-
+-    1. _morphy() is an alias to WordNet's _morphy lemmatizer.
+-    It returns a list of all lemmas found in WordNet.
+-
+-    >>> wnl = WordNetLemmatizer()
+-    >>> print(wnl._morphy('us', 'n'))
+-    ['us', 'u']
+-
+-    2. morphy() is a restrictive wrapper around _morphy().
+-    It returns the first lemma found in WordNet,
+-    or None if no lemma is found.
++    Provides 3 lemmatizer modes: _morphy(), morphy() and lemmatize().
+ 
+-    >>> print(wnl.morphy('us', 'n'))
+-    us
+-
+-    >>> print(wnl.morphy('catss'))
+-    None
+-
+-    3. lemmatize() is a permissive wrapper around _morphy().
++    lemmatize() is a permissive wrapper around _morphy().
+     It returns the shortest lemma found in WordNet,
+     or the input string unchanged if nothing is found.
+ 
+-    >>> print(wnl.lemmatize('us', 'n'))
++    >>> from nltk.stem import WordNetLemmatizer as wnl
++    >>> print(wnl().lemmatize('us', 'n'))
+     u
+ 
+-    >>> print(wnl.lemmatize('Anythinggoeszxcv'))
++    >>> print(wnl().lemmatize('Anythinggoeszxcv'))
+     Anythinggoeszxcv
+ 
+     """
+ 
+-    morphy = wn.morphy
++    def _morphy(self, form, pos, check_exceptions=True):
++        """
++        _morphy() is WordNet's _morphy lemmatizer.
++        It returns a list of all lemmas found in WordNet.
++
++        >>> from nltk.stem import WordNetLemmatizer as wnl
++        >>> print(wnl()._morphy('us', 'n'))
++        ['us', 'u']
++        """
++        from nltk.corpus import wordnet as wn
++
++        return wn._morphy(form, pos, check_exceptions)
++
++    def morphy(self, form, pos=None, check_exceptions=True):
++        """
++        morphy() is a restrictive wrapper around _morphy().
++        It returns the first lemma found in WordNet,
++        or None if no lemma is found.
++
++        >>> from nltk.stem import WordNetLemmatizer as wnl
++        >>> print(wnl().morphy('us', 'n'))
++        us
++
++        >>> print(wnl().morphy('catss'))
++        None
++        """
++        from nltk.corpus import wordnet as wn
+ 
+-    _morphy = wn._morphy
++        return wn.morphy(form, pos, check_exceptions)
+ 
+     def lemmatize(self, word: str, pos: str = "n") -> str:
+         """Lemmatize `word` by picking the shortest of the possible lemmas,
+         using the wordnet corpus reader's built-in _morphy function.
+         Returns the input word unchanged if it cannot be found in WordNet.
+ 
+-        >>> from nltk.stem import WordNetLemmatizer
+-        >>> wnl = WordNetLemmatizer()
+-        >>> print(wnl.lemmatize('dogs'))
++        >>> from nltk.stem import WordNetLemmatizer as wnl
++        >>> print(wnl().lemmatize('dogs'))
+         dog
+-        >>> print(wnl.lemmatize('churches'))
++        >>> print(wnl().lemmatize('churches'))
+         church
+-        >>> print(wnl.lemmatize('aardwolves'))
++        >>> print(wnl().lemmatize('aardwolves'))
+         aardwolf
+-        >>> print(wnl.lemmatize('abaci'))
++        >>> print(wnl().lemmatize('abaci'))
+         abacus
+-        >>> print(wnl.lemmatize('hardrock'))
++        >>> print(wnl().lemmatize('hardrock'))
+         hardrock
+ 
+         :param word: The input word to lemmatize.
diff -Nru nltk-3.9.1/debian/patches/series nltk-3.9.1/debian/patches/series
--- nltk-3.9.1/debian/patches/series	1970-01-01 01:00:00.000000000 +0100
+++ nltk-3.9.1/debian/patches/series	2024-10-11 15:23:33.000000000 +0100
@@ -0,0 +1 @@
+import-wordnet-corpus-lazily.patch

Reply via email to