patch 9.1.1046: fuzzymatching doesn't prefer matching camelcase Commit: https://github.com/vim/vim/commit/9dfc7e5e6169594f6f4607ef1ba9dd347a9194d2 Author: glepnir <glephun...@gmail.com> Date: Tue Jan 21 22:33:13 2025 +0100
patch 9.1.1046: fuzzymatching doesn't prefer matching camelcase Problem: fuzzymatching doesn't prefer matching camelcase (Tomasz N) Solution: Add extra score when case matches (glepnir) fixes: #16434 closes: #16439 Signed-off-by: glepnir <glephun...@gmail.com> Signed-off-by: Christian Brabandt <c...@256bit.org> diff --git a/src/search.c b/src/search.c index 616331ee1..46fa7b9d9 100644 --- a/src/search.c +++ b/src/search.c @@ -42,7 +42,7 @@ static void find_mps_values(int *initc, int *findc, int *backwards, int switchit static int is_zero_width(char_u *pattern, size_t patternlen, int move, pos_T *cur, int direction); static void cmdline_search_stat(int dirc, pos_T *pos, pos_T *cursor_pos, int show_top_bot_msg, char_u *msgbuf, size_t msgbuflen, int recompute, int maxcount, long timeout); static void update_search_stat(int dirc, pos_T *pos, pos_T *cursor_pos, searchstat_T *stat, int recompute, int maxcount, long timeout); -static int fuzzy_match_compute_score(char_u *str, int strSz, int_u *matches, int numMatches); +static int fuzzy_match_compute_score(char_u *fuzpat, char_u *str, int strSz, int_u *matches, int numMatches); static int fuzzy_match_recursive(char_u *fuzpat, char_u *str, int_u strIdx, int *outScore, char_u *strBegin, int strLen, int_u *srcMatches, int_u *matches, int maxMatches, int nextMatch, int *recursionCount); #if defined(FEAT_EVAL) || defined(FEAT_PROTO) static int fuzzy_match_item_compare(const void *s1, const void *s2); @@ -4355,6 +4355,10 @@ typedef struct #define CAMEL_BONUS 30 // bonus if the first letter is matched #define FIRST_LETTER_BONUS 15 +// bonus if exact match +#define EXACT_MATCH_BONUS 100 +// bonus if case match when no ignorecase +#define CASE_MATCH_BONUS 25 // penalty applied for every letter in str before the first match #define LEADING_LETTER_PENALTY (-5) // maximum penalty for leading letters @@ -4374,6 +4378,7 @@ typedef struct */ static int fuzzy_match_compute_score( + char_u *fuzpat, char_u *str, int strSz, int_u *matches, @@ -4386,6 +4391,11 @@ fuzzy_match_compute_score( char_u *p = str; int_u sidx = 0; int is_exact_match = TRUE; + char_u *orig_fuzpat = fuzpat - numMatches; + char_u *curpat = orig_fuzpat; + int pat_idx = 0; + // Track consecutive camel case matches + int consecutive_camel = 0; // Initialize score score = 100; @@ -4404,6 +4414,8 @@ fuzzy_match_compute_score( for (i = 0; i < numMatches; ++i) { int_u currIdx = matches[i]; + int curr; + int is_camel = FALSE; if (i > 0) { @@ -4413,15 +4425,18 @@ fuzzy_match_compute_score( if (currIdx == (prevIdx + 1)) score += SEQUENTIAL_BONUS; else + { score += GAP_PENALTY * (currIdx - prevIdx); + // Reset consecutive camel count on gap + consecutive_camel = 0; + } } // Check for bonuses based on neighbor character value if (currIdx > 0) { // Camel case - int neighbor = ' '; - int curr; + int neighbor = ' '; if (has_mbyte) { @@ -4439,8 +4454,18 @@ fuzzy_match_compute_score( curr = str[currIdx]; } + // Enhanced camel case scoring if (vim_islower(neighbor) && vim_isupper(curr)) - score += CAMEL_BONUS; + { + score += CAMEL_BONUS * 2; // Double the camel case bonus + is_camel = TRUE; + consecutive_camel++; + // Additional bonus for consecutive camel + if (consecutive_camel > 1) + score += CAMEL_BONUS; + } + else + consecutive_camel = 0; // Bonus if the match follows a separator character if (neighbor == '/' || neighbor == '\') @@ -4452,14 +4477,47 @@ fuzzy_match_compute_score( { // First letter score += FIRST_LETTER_BONUS; + curr = has_mbyte ? (*mb_ptr2char)(p) : str[currIdx]; } + + // Case matching bonus + if (vim_isalpha(curr)) + { + while (pat_idx < i && *curpat) + { + if (has_mbyte) + MB_PTR_ADV(curpat); + else + curpat++; + pat_idx++; + } + + if (has_mbyte) + { + if (curr == (*mb_ptr2char)(curpat)) + { + score += CASE_MATCH_BONUS; + // Extra bonus for exact case match in camel + if (is_camel) + score += CASE_MATCH_BONUS / 2; + } + } + else if (curr == *curpat) + { + score += CASE_MATCH_BONUS; + if (is_camel) + score += CASE_MATCH_BONUS / 2; + } + } + // Check exact match condition if (currIdx != (int_u)i) is_exact_match = FALSE; } + // Boost score for exact matches if (is_exact_match && numMatches == strSz) - score += 100; + score += EXACT_MATCH_BONUS; return score; } @@ -4563,7 +4621,7 @@ fuzzy_match_recursive( // Calculate score if (matched) - *outScore = fuzzy_match_compute_score(strBegin, strLen, matches, + *outScore = fuzzy_match_compute_score(fuzpat, strBegin, strLen, matches, nextMatch); // Return best result diff --git a/src/testdir/test_matchfuzzy.vim b/src/testdir/test_matchfuzzy.vim index e880d7335..cba08446b 100644 --- a/src/testdir/test_matchfuzzy.vim +++ b/src/testdir/test_matchfuzzy.vim @@ -96,15 +96,15 @@ endfunc " Test for the matchfuzzypos() function func Test_matchfuzzypos() - call assert_equal([['curl', 'world'], [[2,3], [2,3]], [128, 127]], matchfuzzypos(['world', 'curl'], 'rl')) - call assert_equal([['curl', 'world'], [[2,3], [2,3]], [128, 127]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) call assert_equal([['hello', 'hello world hello world'], - \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [375, 257]], + \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [500, 382]], \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello')) - call assert_equal([['aaaaaaa'], [[0, 1, 2]], [191]], matchfuzzypos(['aaaaaaa'], 'aaa')) - call assert_equal([['a b'], [[0, 3]], [219]], matchfuzzypos(['a b'], 'a b')) - call assert_equal([['a b'], [[0, 3]], [219]], matchfuzzypos(['a b'], 'a b')) - call assert_equal([['a b'], [[0]], [112]], matchfuzzypos(['a b'], ' a ')) + call assert_equal([['aaaaaaa'], [[0, 1, 2]], [266]], matchfuzzypos(['aaaaaaa'], 'aaa')) + call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0]], [137]], matchfuzzypos(['a b'], ' a ')) call assert_equal([[], [], []], matchfuzzypos(['a b'], ' ')) call assert_equal([[], [], []], matchfuzzypos(['world', 'curl'], 'ab')) let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256)) @@ -113,33 +113,33 @@ func Test_matchfuzzypos() call assert_equal([[], [], []], matchfuzzypos([], 'abc')) " match in a long string - call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [-135]], + call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [-60]], \ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc')) " preference for camel case match - call assert_equal([['xabcxxaBc'], [[6, 7, 8]], [189]], matchfuzzypos(['xabcxxaBc'], 'abc')) + call assert_equal([['xabcxxaBc'], [[6, 7, 8]], [269]], matchfuzzypos(['xabcxxaBc'], 'abc')) " preference for match after a separator (_ or space) - call assert_equal([['xabx_ab'], [[5, 6]], [145]], matchfuzzypos(['xabx_ab'], 'ab')) + call assert_equal([['xabx_ab'], [[5, 6]], [195]], matchfuzzypos(['xabx_ab'], 'ab')) " preference for leading letter match - call assert_equal([['abcxabc'], [[0, 1]], [150]], matchfuzzypos(['abcxabc'], 'ab')) + call assert_equal([['abcxabc'], [[0, 1]], [200]], matchfuzzypos(['abcxabc'], 'ab')) " preference for sequential match - call assert_equal([['aobncedone'], [[7, 8, 9]], [158]], matchfuzzypos(['aobncedone'], 'one')) + call assert_equal([['aobncedone'], [[7, 8, 9]], [233]], matchfuzzypos(['aobncedone'], 'one')) " best recursive match - call assert_equal([['xoone'], [[2, 3, 4]], [168]], matchfuzzypos(['xoone'], 'one')) + call assert_equal([['xoone'], [[2, 3, 4]], [243]], matchfuzzypos(['xoone'], 'one')) " match multiple words (separated by space) - call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [369]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) + call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo', {'matchseq': 1})) - call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [369]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz')) - call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 5, 10]], [326]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1})) + call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz')) + call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 5, 10]], [476]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1})) call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two')) call assert_equal([[], [], []], ['foo bar']->matchfuzzypos(" ")) - call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [757]], ['grace']->matchfuzzypos('race ace grace')) + call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [1057]], ['grace']->matchfuzzypos('race ace grace')) let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] - call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [192]], + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]], \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}})) - call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [192]], + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]], \ matchfuzzypos(l, 'cam', {'key' : 'val'})) call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}})) call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'key' : 'val'})) @@ -154,6 +154,18 @@ func Test_matchfuzzypos() call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : test_null_string()})", 'E475:') call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') + " case match + call assert_equal([['Match', 'match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['match', 'Match'], 'Ma')) + call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['Match', 'match'], 'ma')) + " CamelCase has high weight even case match + call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc')) + call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc')) + call assert_equal(['MyTest', 'Mytest', 'mytest', ],matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT')) + call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], + \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelCase')) + call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], + \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelcaseM')) + let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:') endfunc @@ -204,12 +216,12 @@ func Test_matchfuzzypos_mbyte() call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [88]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) " reverse the order of characters call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ')) - call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [222, 113]], + call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [252, 143]], \ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], - \ [[0, 1], [0, 1], [0, 1], [0, 2]], [151, 148, 145, 110]], + \ [[0, 1], [0, 1], [0, 1], [0, 2]], [176, 173, 170, 135]], \ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) - call assert_equal([['ααααααα'], [[0, 1, 2]], [191]], + call assert_equal([['ααααααα'], [[0, 1, 2]], [216]], \ matchfuzzypos(['ααααααα'], 'ααα')) call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl')) @@ -222,10 +234,10 @@ func Test_matchfuzzypos_mbyte() call assert_equal([[], [], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘')) " match in a long string - call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [-135]], + call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [-110]], \ matchfuzzypos([repeat('ぶ', 300) .. 'ẼẼẼ'], 'ẼẼẼ')) " preference for camel case match - call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]], [189]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) + call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]], [219]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) " preference for match after a separator (_ or space) call assert_equal([['xちだx_ちだ'], [[5, 6]], [145]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) " preference for leading letter match diff --git a/src/version.c b/src/version.c index fb5fd376e..ad8a7480b 100644 --- a/src/version.c +++ b/src/version.c @@ -704,6 +704,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1046, /**/ 1045, /**/ -- -- You received this message from the "vim_dev" maillist. Do not top-post! Type your reply below the text you are replying to. For more information, visit http://www.vim.org/maillist.php --- You received this message because you are subscribed to the Google Groups "vim_dev" group. To unsubscribe from this group and stop receiving emails from it, send an email to vim_dev+unsubscr...@googlegroups.com. To view this discussion visit https://groups.google.com/d/msgid/vim_dev/E1taM3T-00AAJV-8o%40256bit.org.