branch: externals/pyim commit 6eb5c47b2fcd64944958edd095d4f2c2f18a8526 Author: Feng Shu <tuma...@163.com> Commit: Feng Shu <tuma...@163.com>
Add pyim-cregexp-build-xingma-regexp-from-words --- pyim-cregexp.el | 59 +++++++++++++++++++++++++++++------------------------ tests/pyim-tests.el | 5 ++++- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/pyim-cregexp.el b/pyim-cregexp.el index 095dba6336..d8ff011ec1 100644 --- a/pyim-cregexp.el +++ b/pyim-cregexp.el @@ -210,34 +210,39 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子 (imobj (scheme pyim-scheme-xingma) &optional match-beginning first-equal _all-equal _char-level-num) "从 IMOBJ 创建一个搜索中文的 regexp, 适用于形码输入法。" - (cl-flet ((build-regexp - (list) - (let* ((n (apply #'max (mapcar #'length list))) - results) - (dotimes (i n) - (push (format "[%s]%s" - (mapconcat - (lambda (x) - (if (> i (- (length x) 1)) - "" - (char-to-string - (elt x i)))) - list "") - (if (> i 0) "?" "")) - results)) - (string-join (reverse results))))) - (let* ((code-prefix (pyim-scheme-code-prefix scheme)) - (regexp (mapconcat - (lambda (x) - (let ((code (concat (or code-prefix "") - (if first-equal - (substring x 0 1) - x)))) - (build-regexp (pyim-dcache-get code '(code2word))))) - imobj ""))) - (unless (equal regexp "") - (concat (if match-beginning "^" "") regexp))))) + (let* ((code-prefix (pyim-scheme-code-prefix scheme)) + (regexp (mapconcat + (lambda (x) + (let ((code (concat (or code-prefix "") + (if first-equal + (substring x 0 1) + x)))) + (pyim-cregexp-build-xingma-regexp-from-words + (pyim-dcache-get code '(code2word))))) + imobj ""))) + (unless (equal regexp "") + (concat (if match-beginning "^" "") regexp)))) +(defun pyim-cregexp-build-xingma-regexp-from-words (words) + "根据 WORDS, 创建一个可以搜索这些 WORDS 的 regexp. + +比如:工, 恭恭敬敬 => [工恭][恭]?[敬]?[敬]? + +通过 \"[工恭][恭]?[敬]?[敬]?\" 可以搜索 \"工\" 和 \"恭恭敬敬\"." + (let ((n (apply #'max (mapcar #'length words))) + results) + (dotimes (i n) + (push (format "[%s]%s" + (mapconcat + (lambda (x) + (if (> i (- (length x) 1)) + "" + (char-to-string + (elt x i)))) + words "") + (if (> i 0) "?" "")) + results)) + (string-join (reverse results)))) ;; * Footer (provide 'pyim-cregexp) diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el index 7c25ff4385..90cbb6c48b 100644 --- a/tests/pyim-tests.el +++ b/tests/pyim-tests.el @@ -975,7 +975,10 @@ (should (equal (pyim-cregexp-build "aaaa'aaaa") "\\(?:\\(?:aaaa'\\|aaaa\\|[工恭]恭?敬?敬?\\)\\(?:aaaa\\|[工恭]恭?敬?敬?\\)\\)")) (should (equal (pyim-cregexp-create-1 "aaaa'aaaa" wubi) - "\\(?:aaaa'\\|aaaa\\|[工恭][恭]?[敬]?[敬]?\\)\\(?:aaaa\\|[工恭][恭]?[敬]?[敬]?\\)"))) + "\\(?:aaaa'\\|aaaa\\|[工恭][恭]?[敬]?[敬]?\\)\\(?:aaaa\\|[工恭][恭]?[敬]?[敬]?\\)")) + (should (equal (pyim-cregexp-build-xingma-regexp-from-words '("工" "恭恭敬敬")) + "[工恭][恭]?[敬]?[敬]?")) + ) (with-temp-buffer (insert "haha nihao")