branch: externals/pyim commit 7b96b826a2e0f5ea9267fc031c0a3431888a78e9 Author: Feng Shu <tuma...@163.com> Commit: Feng Shu <tuma...@163.com>
Add pyim-cstring-to-code-criteria * pyim.el (pyim-terminate-translation, pyim-create-pyim-word) (pyim-select-word): use pyim-cstring-to-code-criteria. * pyim-cstring.el (pyim-cstring-to-code-criteria): New variable. --- pyim-cstring.el | 19 ++++++++++++++----- pyim.el | 19 ++++++++++--------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/pyim-cstring.el b/pyim-cstring.el index 31a7f8d..7a8131e 100644 --- a/pyim-cstring.el +++ b/pyim-cstring.el @@ -33,6 +33,15 @@ "Chinese string tools for pyim." :group 'pyim) +(defvar pyim-cstring-to-code-criteria nil + "用于 code 选取的基准字符串。 + +`pyim-cstring-to-codes' 获取到一个词条的多个 codes 时,会将所有的 +codes 与这个字符串进行比较,然后选择一个最相似的 code 输出. + +这个字符串主要用于全拼和双拼输入法的多音字矫正,一般使用用户输入 +生成的 imobjs 转换得到,保留了用户原始输入的许多信息。") + ;; ** 中文字符串分词相关功能 (defun pyim-cstring-split-to-list (chinese-string &optional max-word-length delete-dups prefer-short-word) "一个基于 pyim 的中文分词函数。这个函数可以将中文字符 @@ -350,7 +359,7 @@ code-prefix)。当RETURN-LIST 设置为 t 时,返回一个 code list。" (substring s4 0 1)))))) (t nil)))) -(defun pyim-cstring-to-codes (string scheme-name &optional entered) +(defun pyim-cstring-to-codes (string scheme-name &optional criteria) "将 STRING 转换为 SCHEME-NAME 对应的 codes." (let ((class (pyim-scheme-get-option scheme-name :class))) (cond ((eq class 'xingma) @@ -358,15 +367,15 @@ code-prefix)。当RETURN-LIST 设置为 t 时,返回一个 code list。" ;;拼音使用了多音字校正 (t (let ((codes (pyim-cstring-to-pinyin string nil "-" t nil t)) codes-sorted) - (if (< (length entered) 1) + (if (< (length criteria) 1) codes - ;; 将 code 与用户输入 entered 比对,选取一个与用户输入最类似的 + ;; 将 所有 codes 与 criteria 字符串比对,选取相似度最高的一个 ;; code. 这种处理方式适合拼音输入法。 (setq codes-sorted (sort codes (lambda (a b) - (< (string-distance a entered) - (string-distance b entered))))) + (< (string-distance a criteria) + (string-distance b criteria))))) (list (car codes-sorted)))))))) ;; ** 获取光标处中文字符串或者中文词条的功能 diff --git a/pyim.el b/pyim.el index ae7c29a..168b33b 100644 --- a/pyim.el +++ b/pyim.el @@ -434,7 +434,7 @@ REFRESH-COMMON-DCACHE 已经废弃,不要再使用了。" (setq pyim-force-input-chinese nil) (pyim-page-hide) (pyim-entered-erase-buffer) - (setq pyim-entered-longest nil) + (setq pyim-cstring-to-code-criteria nil) (pyim-entered-refresh-timer-reset) (let* ((class (pyim-scheme-get-option (pyim-scheme-name) :class)) (func (intern (format "pyim-terminate-translation:%S" class)))) @@ -467,7 +467,7 @@ BUG:拼音无法有效地处理多音字。" (let* ((scheme-name (pyim-scheme-name)) (class (pyim-scheme-get-option scheme-name :class)) (code-prefix (pyim-scheme-get-option scheme-name :code-prefix)) - (codes (pyim-cstring-to-codes word scheme-name pyim-entered-longest))) + (codes (pyim-cstring-to-codes word scheme-name pyim-cstring-to-code-criteria))) ;; 保存对应词条的词频 (when (> (length word) 0) (pyim-dcache-update-iword2count word prepend wordcount-handler)) @@ -590,13 +590,14 @@ FILE 的格式与 `pyim-dcache-export' 生成的文件格式相同, (defun pyim-select-word () "从选词框中选择当前词条,然后删除该词条对应拼音。" (interactive) - ;; 记录用户在没有多次选词前的输入,用于多音字矫正。 - (setq pyim-entered-longest - (let ((entered (pyim-entered-get 'point-before))) - (if (> (length pyim-entered-longest) - (length entered)) - pyim-entered-longest - entered))) + (setq pyim-cstring-to-code-criteria + (let ((str (mapconcat #'identity + (pyim-codes-create (car pyim-imobjs) (pyim-scheme-name)) + ""))) + (if (> (length pyim-cstring-to-code-criteria) + (length str)) + pyim-cstring-to-code-criteria + str))) (if (null pyim-candidates) ; 如果没有选项,输入空格 (progn (pyim-outcome-handle 'last-char)