[elpa] externals/pyim bf92514: 拼音输入法可以搜索当前 buffer 来获取词条。

ELPA Syncer Fri, 10 Dec 2021 06:57:41 -0800

branch: externals/pyim
commit bf925142a53a36179ec42e7d93e389da0487dc8c
Author: Feng Shu <tuma...@163.com>
Commit: Feng Shu <tuma...@163.com>


    拼音输入法可以搜索当前 buffer 来获取词条。
    
        * tests/pyim-tests.el (pyim-tests-pyim-cregexp): test cinese-only 
argument.
    
        * pyim-cregexp.el (pyim-cregexp-build, pyim-cregexp-build-1): Add 
chinese-only argument.
    
        * pyim-common.el (pyim-time-limit-while): New macro.
    
        * pyim-candidates.el (pyim-candidates-create:quanpin): support async.
        (pyim-candidates-search-buffer): New function.
---
 pyim-candidates.el  | 28 +++++++++++++++++++++++++++-
 pyim-common.el      | 13 +++++++++++++
 pyim-cregexp.el     | 20 ++++++++++++--------
 tests/pyim-tests.el |  6 ++++++
 4 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/pyim-candidates.el b/pyim-candidates.el
index c9d1232..afe61de 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -91,7 +91,17 @@ IMOBJS 获得候选词条。"
 
 (defun pyim-candidates-create:quanpin (imobjs scheme-name &optional async)
   "`pyim-candidates-create' 处理全拼输入法的函数."
-  (unless async
+  (if async
+      ;; 使用当前的 entered 构建一个搜索中文的正则表达式, 然后使用这个正则表达式
+      ;; 在当前 buffer 中搜索词条。
+      (let ((str (pyim-entered-get)))
+        (if (< (length str) 1)
+            pyim-candidates
+          ;; NOTE: 让第一个词保持不变是不是合理，有待进一步的观察。
+          `(,(car pyim-candidates)
+            ,@(pyim-candidates-search-buffer
+               (pyim-cregexp-build str 3 t))
+            ,@(cdr pyim-candidates))))
     ;; 这段代码主要实现以下功能：假如用户输入 nihaomazheshi, 但词库里面找不到对
     ;; 应的词条，那么输入法自动用 nihaoma 和 zheshi 的第一个词条："你好吗" 和 "
     ;; 这是" 连接成一个新的字符串 "你好吗这是" 做为第一个候选词。
@@ -110,6 +120,22 @@ IMOBJS 获得候选词条。"
       (append (pyim-subconcat (nreverse output) "")
               candidates))))
 
+(defun pyim-candidates-search-buffer (regexp)
+  "在当前 buffer 中使用 REGEXP 搜索词条。"
+  (save-excursion
+    (let ((start (current-time))
+          words)
+      (goto-char (point-min))
+      ;; Search after pos.
+      (pyim-time-limit-while (and (not (input-pending-p))
+                                  (re-search-forward regexp nil t))
+          start 0.1 25
+          (let ((match (match-string-no-properties 0)))
+            ;; NOTE: 单个汉字我觉得不值得收集。
+            (when (>= (length match) 2)
+              (cl-pushnew match words :test #'equal))))
+      words)))
+
 (defun pyim-candidates-create-quanpin (imobjs scheme-name &optional 
fast-search)
   "`pyim-candidates-create:quanpin' 内部使用的函数。"
   (let (jianpin-words znabc-words personal-words common-words pinyin-chars-1 
pinyin-chars-2)
diff --git a/pyim-common.el b/pyim-common.el
index 5158da3..0173c12 100644
--- a/pyim-common.el
+++ b/pyim-common.el
@@ -178,6 +178,19 @@ When CARE-FIRST-ONE is no-nil, ((a b c) (d e)) => (a d)."
                           (append key nil))
                   unread-command-events))))
 
+;; Fork from `company-dabbrev--time-limit-while' in company-mode."
+(defmacro pyim-time-limit-while (test start limit freq &rest body)
+  (declare (indent 3) (debug t))
+  `(let ((pyim-time-limit-while-counter 0))
+     (catch 'done
+       (while ,test
+         ,@body
+         (and ,limit
+              (= (cl-incf pyim-time-limit-while-counter) ,freq)
+              (setq pyim-time-limit-while-counter 0)
+              (> (float-time (time-since ,start)) ,limit)
+              (throw 'done 'pyim-time-out))))))
+
 ;; * Footer
 (provide 'pyim-common)
 
diff --git a/pyim-cregexp.el b/pyim-cregexp.el
index 56a5a1b..2a999a1 100644
--- a/pyim-cregexp.el
+++ b/pyim-cregexp.el
@@ -49,7 +49,7 @@
       (max (min num 4) 1)
     4))
 
-(defun pyim-cregexp-build (string &optional char-level-num)
+(defun pyim-cregexp-build (string &optional char-level-num chinese-only)
   "根据 STRING 构建一个中文 regexp, 用于 \"拼音搜索汉字\".
 
 比如：\"nihao\" -> \"[你呢...][好号...] \\| nihao\"
@@ -60,6 +60,8 @@ CHAR-LEVEL-NUM 代表汉字常用级别，pyim 中根据汉字的使用频率，
 如果这个参数设置为3, 那么代表在构建 regexp 是，只使用常用级别小于
 等于3的汉字。
 
+如果 CHINESE-ONLY 为真，那么生成的 regexp 只能搜索汉字。
+
 注意事项：如果生成的 regexp 太长，Emacs 无法处理，那么，这个命令
 会抛弃一些不常用的汉字，重新生成，知道生成一个 Emacs 可以处理的
 regexp, 所以搜索单字的时候一般可以搜到生僻字，但搜索句子的时候，
@@ -77,7 +79,7 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字，但搜索句子
                      (pyim-cregexp-build-from-rx
                       (lambda (x)
                         (if (stringp x)
-                            (xr (pyim-cregexp-build-1 x num))
+                            (xr (pyim-cregexp-build-1 x num chinese-only))
                           x))
                       (xr string))))
                   string))
@@ -104,7 +106,7 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字，但搜索句子
              rx-form))
     (_ (funcall fn rx-form))))
 
-(defun pyim-cregexp-build-1 (str &optional char-level-num)
+(defun pyim-cregexp-build-1 (str &optional char-level-num chinese-only)
   (let* ((num (pyim-cregexp-char-level-num char-level-num))
          (scheme-name (pyim-scheme-name))
          (class (pyim-scheme-get-option scheme-name :class))
@@ -139,11 +141,13 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字，但搜索句子
                               (delq nil regexp-list)
                               "\\|")))
                 (regexp
-                 (if (> (length regexp) 0)
-                     (if (equal string string1)
-                         (concat string "\\|" regexp)
-                       (concat string "\\|" string1 "\\|" regexp))
-                   string)))
+                 (if chinese-only
+                     regexp
+                   (if (> (length regexp) 0)
+                       (if (equal string string1)
+                           (concat string "\\|" regexp)
+                         (concat string "\\|" string1 "\\|" regexp))
+                     string))))
            (format "\\(?:%s\\)" regexp))))
      lst "")))
 
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index da23ba2..ca552e4 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -429,6 +429,12 @@
     (should (string-match-p regexp "你好"))
     (should (string-match-p regexp "哈哈你好吗")))
 
+  (let ((regexp (pyim-cregexp-build "nihao" nil t)))
+    (should-not (string-match-p regexp "nihao"))
+    (should-not (string-match-p regexp "anihaob"))
+    (should (string-match-p regexp "你好"))
+    (should (string-match-p regexp "哈哈你好吗")))
+
   (let ((regexp (pyim-cregexp-build "beng")))
     (should (string-match-p regexp "痭"))
     (should (string-match-p regexp "泵"))

[elpa] externals/pyim bf92514: 拼音输入法可以搜索当前 buffer 来获取词条。

Reply via email to