commit:     66df1d045a64f8ad6453d9668cdb66980c128b69
Author:     Zac Medico <zmedico <AT> gentoo <DOT> org>
AuthorDate: Sat Jul  8 19:44:40 2017 +0000
Commit:     Zac Medico <zmedico <AT> gentoo <DOT> org>
CommitDate: Sun Jul  9 17:18:03 2017 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=66df1d04

fuzzy search: weigh category similarity independently (bug 623648)

Weigh the similarity of category and package names independently,
in order to avoid matching lots of irrelevant packages in the same
category when the package name is much shorter than the category
name.

X-Gentoo-bug: 623648
X-Gentoo-bug-url: https://bugs.gentoo.org/show_bug.cgi?id=623648
Acked-by: Brian Dolbec <dolsen <AT> gentoo.org>

 pym/_emerge/search.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py
index 20a0c026e..dc91ad315 100644
--- a/pym/_emerge/search.py
+++ b/pym/_emerge/search.py
@@ -264,15 +264,33 @@ class search(object):
                        if self.fuzzy:
                                fuzzy = True
                                cutoff = float(self.search_similarity) / 100
-                               seq_match = difflib.SequenceMatcher()
-                               seq_match.set_seq2(self.searchkey.lower())
+                               if match_category:
+                                       # Weigh the similarity of category and 
package
+                                       # names independently, in order to 
avoid matching
+                                       # lots of irrelevant packages in the 
same category
+                                       # when the package name is much shorter 
than the
+                                       # category name.
+                                       part_split = portage.catsplit
+                               else:
+                                       part_split = lambda match_string: 
(match_string,)
 
-                               def fuzzy_search(match_string):
+                               part_matchers = []
+                               for part in part_split(self.searchkey):
+                                       seq_match = difflib.SequenceMatcher()
+                                       seq_match.set_seq2(part.lower())
+                                       part_matchers.append(seq_match)
+
+                               def fuzzy_search_part(seq_match, match_string):
                                        seq_match.set_seq1(match_string.lower())
                                        return (seq_match.real_quick_ratio() >= 
cutoff and
                                                seq_match.quick_ratio() >= 
cutoff and
                                                seq_match.ratio() >= cutoff)
 
+                               def fuzzy_search(match_string):
+                                       return all(fuzzy_search_part(seq_match, 
part)
+                                               for seq_match, part in zip(
+                                               part_matchers, 
part_split(match_string)))
+
                for package in self._cp_all():
                        self._spinner_update()
 

Reply via email to