branch: externals/pyim commit f096ead57432a32c8d85b2f714acc629f941ae71 Author: Feng Shu <tuma...@163.com> Commit: Feng Shu <tuma...@163.com>
在创建个人词条时,使用 :noexport 对没有正确处理多音字的词条进行标记。 * pyim-dhashcache.el (pyim-dhashcache-export): Handle :noexport t. * pyim-process.el (pyim-process-create-word): Mark :noexport t when multi codes are found. * pyim-dhashcache.el (pyim-dhashcache-insert-word-into-icode2word): Do not use pyim-list-merge. * pyim-common.el (pyim-list-merge): Removed. --- pyim-common.el | 10 ---------- pyim-dhashcache.el | 25 ++++++++++++++++--------- pyim-process.el | 9 ++++++++- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/pyim-common.el b/pyim-common.el index 92f1d94..0452596 100644 --- a/pyim-common.el +++ b/pyim-common.el @@ -85,16 +85,6 @@ append (mapcar (lambda (l) (cons element l)) (pyim-permutate-list list-tail))))))) -(defun pyim-list-merge (a b) - "Join list A and B to a new list, then delete dups." - (let ((a (if (listp a) - a - (list a))) - (b (if (listp b) - b - (list b)))) - (delete-dups `(,@a ,@b)))) - (defun pyim-char-before-to-string (num) "得到光标前第 `num' 个字符,并将其转换为字符串。" (let* ((point (point)) diff --git a/pyim-dhashcache.el b/pyim-dhashcache.el index 2359c28..51a9bb5 100644 --- a/pyim-dhashcache.el +++ b/pyim-dhashcache.el @@ -259,11 +259,17 @@ DCACHE 是一个 code -> words 的 hashtable. (insert ";;; -*- coding: utf-8-unix -*-\n") (maphash (lambda (key value) - (insert (format "%s %s\n" - key - (if (listp value) - (mapconcat #'identity value " ") - value)))) + (let ((value (cl-remove-if + (lambda (x) + ;; 如果某个词条的 text 属性 :noexport 设置为 t, 在导出的 + ;; 时候自动忽略这个词条。 + (and (stringp x) + (get-text-property 0 :noexport x))) + (if (listp value) + value + (list value))))) + (when value + (insert (format "%s %s\n" key (mapconcat #'identity value " ")))))) dcache) (pyim-dcache-write-file file confirm))) @@ -404,10 +410,11 @@ code 对应的中文词条了。 (defun pyim-dhashcache-insert-word-into-icode2word (word pinyin prepend) "保存个人词到缓存." - (pyim-dhashcache-put pyim-dhashcache-icode2word - pinyin - (if prepend (pyim-list-merge word orig-value) - (pyim-list-merge orig-value word)))) + (pyim-dhashcache-put + pyim-dhashcache-icode2word pinyin + (if prepend + `(,word ,@(remove word orig-value)) + `(,@(remove word orig-value) ,word)))) (defun pyim-dhashcache-search-word-code (string) (gethash string pyim-dhashcache-word2code)) diff --git a/pyim-process.el b/pyim-process.el index 6758600..1c814eb 100644 --- a/pyim-process.el +++ b/pyim-process.el @@ -547,7 +547,14 @@ BUG:拼音无法有效地处理多音字。" (dolist (code codes) (unless (pyim-string-match-p "[^ a-z-]" code) (pyim-dcache-insert-icode2word - word (concat (or code-prefix "") code) prepend))) + (if (and (> (length word) 1) + (> (length codes) 1)) + ;; 如果 word 超过一个汉字,并且得到多个 codes,那么大概率说明没有 + ;; 正确处理多音字,这里设置一下 :noexport 属性,在导出词条的时候 + ;; 不导出这些带标记的词。 + (propertize word :noexport t) + (substring-no-properties word)) + (concat (or code-prefix "") code) prepend))) ;; TODO, 排序个人词库? ;; 返回 codes 和 word, 用于 message 命令。 (mapconcat (lambda (code)