Control: tags 1084323 + pending Dear maintainer,
I've prepared an NMU for nltk (versioned as 3.9.1-1.1) using the patch I sent a few days ago, and uploaded it to DELAYED/5. Please feel free to tell me if I should delay it longer. Regards, -- Colin Watson (he/him) [cjwat...@debian.org]
diff -Nru nltk-3.9.1/debian/changelog nltk-3.9.1/debian/changelog --- nltk-3.9.1/debian/changelog 2024-10-02 07:03:25.000000000 +0100 +++ nltk-3.9.1/debian/changelog 2024-10-11 15:23:33.000000000 +0100 @@ -1,3 +1,10 @@ +nltk (3.9.1-1.1) unstable; urgency=medium + + * Non-maintainer upload. + * Don't read the WordNet corpus before it is needed (closes: #1084323). + + -- Colin Watson <cjwat...@debian.org> Fri, 11 Oct 2024 15:23:33 +0100 + nltk (3.9.1-1) unstable; urgency=medium * New upstream version 3.9.1 (Closes: #1074423) diff -Nru nltk-3.9.1/debian/patches/import-wordnet-corpus-lazily.patch nltk-3.9.1/debian/patches/import-wordnet-corpus-lazily.patch --- nltk-3.9.1/debian/patches/import-wordnet-corpus-lazily.patch 1970-01-01 01:00:00.000000000 +0100 +++ nltk-3.9.1/debian/patches/import-wordnet-corpus-lazily.patch 2024-10-11 15:23:33.000000000 +0100 @@ -0,0 +1,122 @@ +From: Eric Kafe <kafe.e...@gmail.com> +Date: Sun, 18 Aug 2024 16:09:01 +0200 +Subject: Fix bug in WordNetLemmatizer + +Fix #3308 by not importing WordNet's _morphy and morphy before they are needed. + +Origin: upstream, https://github.com/nltk/nltk/pull/3309 +Bug: https://github.com/nltk/nltk/issues/3308 +Bug-Debian: https://bugs.debian.org/1084323 +Last-Update: 2024-10-08 +--- + nltk/stem/wordnet.py | 71 +++++++++++++++++++++++++++++----------------------- + 1 file changed, 39 insertions(+), 32 deletions(-) + +diff --git a/nltk/stem/wordnet.py b/nltk/stem/wordnet.py +index 76caf1b..87d08c7 100644 +--- a/nltk/stem/wordnet.py ++++ b/nltk/stem/wordnet.py +@@ -7,64 +7,71 @@ + # URL: <https://www.nltk.org/> + # For license information, see LICENSE.TXT + +-from nltk.corpus import wordnet as wn +- + + class WordNetLemmatizer: + """ + WordNet Lemmatizer + +- Provides 3 lemmatizer modes: +- +- 1. _morphy() is an alias to WordNet's _morphy lemmatizer. +- It returns a list of all lemmas found in WordNet. +- +- >>> wnl = WordNetLemmatizer() +- >>> print(wnl._morphy('us', 'n')) +- ['us', 'u'] +- +- 2. morphy() is a restrictive wrapper around _morphy(). +- It returns the first lemma found in WordNet, +- or None if no lemma is found. ++ Provides 3 lemmatizer modes: _morphy(), morphy() and lemmatize(). + +- >>> print(wnl.morphy('us', 'n')) +- us +- +- >>> print(wnl.morphy('catss')) +- None +- +- 3. lemmatize() is a permissive wrapper around _morphy(). ++ lemmatize() is a permissive wrapper around _morphy(). + It returns the shortest lemma found in WordNet, + or the input string unchanged if nothing is found. + +- >>> print(wnl.lemmatize('us', 'n')) ++ >>> from nltk.stem import WordNetLemmatizer as wnl ++ >>> print(wnl().lemmatize('us', 'n')) + u + +- >>> print(wnl.lemmatize('Anythinggoeszxcv')) ++ >>> print(wnl().lemmatize('Anythinggoeszxcv')) + Anythinggoeszxcv + + """ + +- morphy = wn.morphy ++ def _morphy(self, form, pos, check_exceptions=True): ++ """ ++ _morphy() is WordNet's _morphy lemmatizer. ++ It returns a list of all lemmas found in WordNet. ++ ++ >>> from nltk.stem import WordNetLemmatizer as wnl ++ >>> print(wnl()._morphy('us', 'n')) ++ ['us', 'u'] ++ """ ++ from nltk.corpus import wordnet as wn ++ ++ return wn._morphy(form, pos, check_exceptions) ++ ++ def morphy(self, form, pos=None, check_exceptions=True): ++ """ ++ morphy() is a restrictive wrapper around _morphy(). ++ It returns the first lemma found in WordNet, ++ or None if no lemma is found. ++ ++ >>> from nltk.stem import WordNetLemmatizer as wnl ++ >>> print(wnl().morphy('us', 'n')) ++ us ++ ++ >>> print(wnl().morphy('catss')) ++ None ++ """ ++ from nltk.corpus import wordnet as wn + +- _morphy = wn._morphy ++ return wn.morphy(form, pos, check_exceptions) + + def lemmatize(self, word: str, pos: str = "n") -> str: + """Lemmatize `word` by picking the shortest of the possible lemmas, + using the wordnet corpus reader's built-in _morphy function. + Returns the input word unchanged if it cannot be found in WordNet. + +- >>> from nltk.stem import WordNetLemmatizer +- >>> wnl = WordNetLemmatizer() +- >>> print(wnl.lemmatize('dogs')) ++ >>> from nltk.stem import WordNetLemmatizer as wnl ++ >>> print(wnl().lemmatize('dogs')) + dog +- >>> print(wnl.lemmatize('churches')) ++ >>> print(wnl().lemmatize('churches')) + church +- >>> print(wnl.lemmatize('aardwolves')) ++ >>> print(wnl().lemmatize('aardwolves')) + aardwolf +- >>> print(wnl.lemmatize('abaci')) ++ >>> print(wnl().lemmatize('abaci')) + abacus +- >>> print(wnl.lemmatize('hardrock')) ++ >>> print(wnl().lemmatize('hardrock')) + hardrock + + :param word: The input word to lemmatize. diff -Nru nltk-3.9.1/debian/patches/series nltk-3.9.1/debian/patches/series --- nltk-3.9.1/debian/patches/series 1970-01-01 01:00:00.000000000 +0100 +++ nltk-3.9.1/debian/patches/series 2024-10-11 15:23:33.000000000 +0100 @@ -0,0 +1 @@ +import-wordnet-corpus-lazily.patch