branch: externals/pyim commit bf925142a53a36179ec42e7d93e389da0487dc8c Author: Feng Shu <tuma...@163.com> Commit: Feng Shu <tuma...@163.com>
拼音输入法可以搜索当前 buffer 来获取词条。 * tests/pyim-tests.el (pyim-tests-pyim-cregexp): test cinese-only argument. * pyim-cregexp.el (pyim-cregexp-build, pyim-cregexp-build-1): Add chinese-only argument. * pyim-common.el (pyim-time-limit-while): New macro. * pyim-candidates.el (pyim-candidates-create:quanpin): support async. (pyim-candidates-search-buffer): New function. --- pyim-candidates.el | 28 +++++++++++++++++++++++++++- pyim-common.el | 13 +++++++++++++ pyim-cregexp.el | 20 ++++++++++++-------- tests/pyim-tests.el | 6 ++++++ 4 files changed, 58 insertions(+), 9 deletions(-) diff --git a/pyim-candidates.el b/pyim-candidates.el index c9d1232..afe61de 100644 --- a/pyim-candidates.el +++ b/pyim-candidates.el @@ -91,7 +91,17 @@ IMOBJS 获得候选词条。" (defun pyim-candidates-create:quanpin (imobjs scheme-name &optional async) "`pyim-candidates-create' 处理全拼输入法的函数." - (unless async + (if async + ;; 使用当前的 entered 构建一个搜索中文的正则表达式, 然后使用这个正则表达式 + ;; 在当前 buffer 中搜索词条。 + (let ((str (pyim-entered-get))) + (if (< (length str) 1) + pyim-candidates + ;; NOTE: 让第一个词保持不变是不是合理,有待进一步的观察。 + `(,(car pyim-candidates) + ,@(pyim-candidates-search-buffer + (pyim-cregexp-build str 3 t)) + ,@(cdr pyim-candidates)))) ;; 这段代码主要实现以下功能:假如用户输入 nihaomazheshi, 但词库里面找不到对 ;; 应的词条,那么输入法自动用 nihaoma 和 zheshi 的第一个词条:"你好吗" 和 " ;; 这是" 连接成一个新的字符串 "你好吗这是" 做为第一个候选词。 @@ -110,6 +120,22 @@ IMOBJS 获得候选词条。" (append (pyim-subconcat (nreverse output) "") candidates)))) +(defun pyim-candidates-search-buffer (regexp) + "在当前 buffer 中使用 REGEXP 搜索词条。" + (save-excursion + (let ((start (current-time)) + words) + (goto-char (point-min)) + ;; Search after pos. + (pyim-time-limit-while (and (not (input-pending-p)) + (re-search-forward regexp nil t)) + start 0.1 25 + (let ((match (match-string-no-properties 0))) + ;; NOTE: 单个汉字我觉得不值得收集。 + (when (>= (length match) 2) + (cl-pushnew match words :test #'equal)))) + words))) + (defun pyim-candidates-create-quanpin (imobjs scheme-name &optional fast-search) "`pyim-candidates-create:quanpin' 内部使用的函数。" (let (jianpin-words znabc-words personal-words common-words pinyin-chars-1 pinyin-chars-2) diff --git a/pyim-common.el b/pyim-common.el index 5158da3..0173c12 100644 --- a/pyim-common.el +++ b/pyim-common.el @@ -178,6 +178,19 @@ When CARE-FIRST-ONE is no-nil, ((a b c) (d e)) => (a d)." (append key nil)) unread-command-events)))) +;; Fork from `company-dabbrev--time-limit-while' in company-mode." +(defmacro pyim-time-limit-while (test start limit freq &rest body) + (declare (indent 3) (debug t)) + `(let ((pyim-time-limit-while-counter 0)) + (catch 'done + (while ,test + ,@body + (and ,limit + (= (cl-incf pyim-time-limit-while-counter) ,freq) + (setq pyim-time-limit-while-counter 0) + (> (float-time (time-since ,start)) ,limit) + (throw 'done 'pyim-time-out)))))) + ;; * Footer (provide 'pyim-common) diff --git a/pyim-cregexp.el b/pyim-cregexp.el index 56a5a1b..2a999a1 100644 --- a/pyim-cregexp.el +++ b/pyim-cregexp.el @@ -49,7 +49,7 @@ (max (min num 4) 1) 4)) -(defun pyim-cregexp-build (string &optional char-level-num) +(defun pyim-cregexp-build (string &optional char-level-num chinese-only) "根据 STRING 构建一个中文 regexp, 用于 \"拼音搜索汉字\". 比如:\"nihao\" -> \"[你呢...][好号...] \\| nihao\" @@ -60,6 +60,8 @@ CHAR-LEVEL-NUM 代表汉字常用级别,pyim 中根据汉字的使用频率, 如果这个参数设置为3, 那么代表在构建 regexp 是,只使用常用级别小于 等于3的汉字。 +如果 CHINESE-ONLY 为真,那么生成的 regexp 只能搜索汉字。 + 注意事项:如果生成的 regexp 太长,Emacs 无法处理,那么,这个命令 会抛弃一些不常用的汉字,重新生成,知道生成一个 Emacs 可以处理的 regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子的时候, @@ -77,7 +79,7 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子 (pyim-cregexp-build-from-rx (lambda (x) (if (stringp x) - (xr (pyim-cregexp-build-1 x num)) + (xr (pyim-cregexp-build-1 x num chinese-only)) x)) (xr string)))) string)) @@ -104,7 +106,7 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子 rx-form)) (_ (funcall fn rx-form)))) -(defun pyim-cregexp-build-1 (str &optional char-level-num) +(defun pyim-cregexp-build-1 (str &optional char-level-num chinese-only) (let* ((num (pyim-cregexp-char-level-num char-level-num)) (scheme-name (pyim-scheme-name)) (class (pyim-scheme-get-option scheme-name :class)) @@ -139,11 +141,13 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子 (delq nil regexp-list) "\\|"))) (regexp - (if (> (length regexp) 0) - (if (equal string string1) - (concat string "\\|" regexp) - (concat string "\\|" string1 "\\|" regexp)) - string))) + (if chinese-only + regexp + (if (> (length regexp) 0) + (if (equal string string1) + (concat string "\\|" regexp) + (concat string "\\|" string1 "\\|" regexp)) + string)))) (format "\\(?:%s\\)" regexp)))) lst ""))) diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el index da23ba2..ca552e4 100644 --- a/tests/pyim-tests.el +++ b/tests/pyim-tests.el @@ -429,6 +429,12 @@ (should (string-match-p regexp "你好")) (should (string-match-p regexp "哈哈你好吗"))) + (let ((regexp (pyim-cregexp-build "nihao" nil t))) + (should-not (string-match-p regexp "nihao")) + (should-not (string-match-p regexp "anihaob")) + (should (string-match-p regexp "你好")) + (should (string-match-p regexp "哈哈你好吗"))) + (let ((regexp (pyim-cregexp-build "beng"))) (should (string-match-p regexp "痭")) (should (string-match-p regexp "泵"))