branch: externals/pyim
commit a1e5df62ca06fa20126af633bf1ad2841350b5f4
Author: Feng Shu <tuma...@163.com>
Commit: Feng Shu <tuma...@163.com>

    Add duoyinzi adjust feature
---
 pyim-cstring.el     |  62 ++++---
 pyim-pymap.el       | 454 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 tests/pyim-tests.el |  52 +++---
 3 files changed, 513 insertions(+), 55 deletions(-)

diff --git a/pyim-cstring.el b/pyim-cstring.el
index e55a502090..b3e03e7565 100644
--- a/pyim-cstring.el
+++ b/pyim-cstring.el
@@ -98,18 +98,13 @@ NUMBER 用于递归,表示子字符串在 CSTRING 中的位置。"
 ;; ** 中文字符串到拼音的转换工具
 ;;;###autoload
 (defun pyim-cstring-to-pinyin (string &optional shou-zi-mu separator
-                                      return-list ignore-duo-yin-zi 
adjust-duo-yin-zi)
+                                      return-list ignore-duo-yin-zi _)
   "将汉字字符串转换为对应的拼音字符串的工具.
 
 如果 SHOU-ZI-MU 设置为 t, 转换仅得到拼音首字母字符串。当
 RETURN-LIST 设置为 t 时,返回一个拼音列表,这个列表包含词条的一个
 或者多个拼音(词条包含多音字时);如果 IGNORE-DUO-YIN-ZI 设置为
-t, 遇到多音字时,只使用第一个拼音,其它拼音忽略;当
-ADJUST-DUO-YIN-Zi 设置为 t 时, `pyim-cstring-to-pinyin' 会使用 pyim 已
-安装的词库来校正多音字,但这个功能有一定的限制:
-
-1. pyim 普通词库中不存在的词条不能较正
-2. 多音字校正速度比较慢,实时转换会产生卡顿。
+t, 遇到多音字时,只使用第一个拼音,其它拼音忽略。
 
 BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结果会
 包含多余的连接符:比如: \"你=好\" --> \"ni-=-hao\""
@@ -127,22 +122,11 @@ BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结
                     (pyim-cstring--partition string t)))
 
       ;; 通过排列组合的方式, 重排 pinyins-list。
-      ;; 比如:(("Hello") ("yin") ("hang" "xing")) -> (("Hello" "yin" "hang") 
("Hello" "yin" "xing"))
+      ;; 比如:(("Hello") ("yin") ("hang")) -> (("Hello" "yin" "hang"))
       (setq pinyins-list
-            (pyim-permutate-list pinyins-list))
-
-      ;; 使用 pyim 的安装的词库来校正多音字。
-      ;; FIXME:如果 string 包含非中文的字符,那么多音字矫正将不起作用。
-      (when adjust-duo-yin-zi
-        (pyim-dcache-init-variables)
-        (dolist (pylist pinyins-list)
-          (let* ((py-str (mapconcat #'identity pylist "-"))
-                 (words-from-dicts
-                  (pyim-dcache-get py-str '(code2word))))
-            (when (member string words-from-dicts)
-              (push pylist pinyins-list-adjusted))))
-        (setq pinyins-list-adjusted
-              (nreverse pinyins-list-adjusted)))
+            (pyim-permutate-list
+             (pyim-cstring--adjust-duoyinzi
+              string pinyins-list)))
 
       ;; 返回拼音字符串或者拼音列表
       (let* ((pinyins-list
@@ -161,6 +145,40 @@ BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结
             list
           (string-join list " "))))))
 
+(defun pyim-cstring--adjust-duoyinzi (word pinyins-list)
+  "根据 WORD 对 PINYINS-LIST 进行校正。
+
+比如:
+
+1. WORD:         人民银行
+2. PINYINS-LIST: ((\"ren\") (\"min\") (\"yin\") (\"hang\" \"xing\"))
+3. 输出结果为:  ((\"ren\") (\"min\") (\"yin\") (\"hang\"))
+
+这个函数依赖 `pyim-pymap-duoyinzi' 提供的多音字数据。"
+  (mapcar (lambda (pinyins)
+            (if (= (length pinyins) 1)
+                pinyins
+              (let ((py-adjusted
+                     ;; NOTE: 多音字校正规则:
+                     ;; 1. 首先通过在 WORD 中搜索多音字组成的词条来校正。
+                     ;; 2. 如果多音字组成的词条无法搜索到,就使用这个多音字最常用的读音,
+                     ;;    这样处理有可能校正错误,但大多数情况还是适用的。
+                     (or (cl-find-if
+                          (lambda (pinyin)
+                            (when-let* ((x (pyim-pymap-py2duoyinzi-get 
pinyin)))
+                              (string-match-p (string-join x "\\|") word)))
+                          pinyins)
+                         (cl-find-if
+                          (lambda (pinyin)
+                            (when-let* ((x (pyim-pymap-py2duoyinzi-get pinyin 
t)))
+                              (string-match-p (string-join x "\\|") word)))
+                          pinyins))))
+                ;; 如果多音字校正没有任何结果,就用校正前的信息。
+                (if py-adjusted
+                    (list py-adjusted)
+                  pinyins))))
+          pinyins-list))
+
 ;;;###autoload
 (defun pyim-cstring-to-pinyin-simple (string &optional shou-zi-mu separator 
return-list)
   "简化版的 `pyim-cstring-to-pinyin', 不处理多音字。"
diff --git a/pyim-pymap.el b/pyim-pymap.el
index 22f3ff98b4..c3a3f2367d 100644
--- a/pyim-pymap.el
+++ b/pyim-pymap.el
@@ -458,6 +458,421 @@
 
 但不是完全一致。")
 
+(defvar pyim-pymap-duoyinzi-chars
+  '(("ai" "艾")
+    ("ao" "坳")
+    ("ba" "扒")
+    ("bai" "柏")
+    ("bang" "磅")
+    ("bao" "薄" "曝" "暴" "堡" "剥" "刨")
+    ("beng" "蚌" "泵")
+    ("bi" "裨" "臂")
+    ("bian" "扁" "匾")
+    ("bu" "卜" "不")
+    ("ce" "侧")
+    ("ceng" "曾")
+    ("cha" "查")
+    ("chai" "拆")
+    ("chan" "禅" "掺")
+    ("chang" "厂")
+    ("chao" "嘲")
+    ("che" "辙" "车")
+    ("cheng" "乘")
+    ("chi" "尺")
+    ("chou" "臭" "仇")
+    ("chuo" "绰")
+    ("ci" "伺")
+    ("dai" "呆")
+    ("dan" "单")
+    ("dao" "叨")
+    ("di" "底")
+    ("ding" "丁")
+    ("dong" "洞")
+    ("du" "读" "度")
+    ("dun" "顿" "蹲" "沌" "敦")
+    ("duo" "堕")
+    ("fan" "繁" "番")
+    ("feng" "冯")
+    ("fo" "佛")
+    ("fou" "否")
+    ("ga" "旮")
+    ("gai" "盖")
+    ("gang" "扛")
+    ("gao" "镐")
+    ("ge" "革" "铬" "蛤")
+    ("gei" "给")
+    ("gong" "汞")
+    ("gou" "枸")
+    ("gu" "谷")
+    ("gua" "括" "呱")
+    ("guan" "莞")
+    ("guang" "广")
+    ("gui" "龟" "柜")
+    ("hang" "夯")
+    ("hao" "蒿")
+    ("he" "菏" "核" "和" "呵" "合")
+    ("hei" "嘿")
+    ("hong" "虹" "红")
+    ("hu" "鹄" "浒")
+    ("hua" "划")
+    ("huai" "徊")
+    ("huan" "还")
+    ("hun" "荤")
+    ("huo" "豁")
+    ("ji" "藉" "缉" "稽" "祭" "瘠" "亟")
+    ("jia" "贾" "家" "夹" "伽" "价")
+    ("jiao" "脚" "缴")
+    ("jie" "芥" "秸")
+    ("jing" "颈" "荆")
+    ("ju" "桔" "咀" "句")
+    ("jue" "jiao" "觉")
+    ("jun" "浚")
+    ("ka" "咖" "卡")
+    ("kai" "楷")
+    ("kan" "槛")
+    ("kang" "亢")
+    ("ke" "坷" "咳")
+    ("keng" "吭")
+    ("kui" "馈" "溃" "匮")
+    ("la" "蜡" "腊")
+    ("lao" "烙" "姥")
+    ("le" "勒")
+    ("lei" "肋")
+    ("lin" "赁")
+    ("ling" "棱")
+    ("liu" "六")
+    ("lu" "陆" "碌")
+    ("luo" "络")
+    ("lv" "缕" "绿")
+    ("mai" "脉" "埋")
+    ("mang" "氓")
+    ("mao" "冒")
+    ("me" "么")
+    ("mei" "酶" "没")
+    ("meng" "萌")
+    ("mi" "谜" "秘" "泌")
+    ("mian" "娩")
+    ("miu" "缪")
+    ("mo" "模" "摩" "抹")
+    ("mou" "牟")
+    ("mu" "沐")
+    ("na" "那" "哪" "呐")
+    ("nan" "南")
+    ("ne" "呢")
+    ("ni" "溺")
+    ("niao" "鸟" "尿")
+    ("nong" "弄")
+    ("nuo" "娜")
+    ("nve" "疟")
+    ("pan" "ban" "扳")
+    ("pang" "膀" "胖" "旁")
+    ("pao" "炮")
+    ("pi" "辟")
+    ("pin" "拚")
+    ("ping" "屏")
+    ("po" "魄" "迫")
+    ("pu" "璞" "莆" "脯" "瀑" "朴" "埔")
+    ("qi" "骑" "蹊" "栖" "期" "契" "其")
+    ("qian" "乾" "铅" "茜" "浅" "嵌" "堑")
+    ("qiao" "荞")
+    ("qie" "且" "茄")
+    ("qu" "区")
+    ("quan" "券")
+    ("que" "雀")
+    ("sai" "塞")
+    ("sao" "缫")
+    ("se" "色")
+    ("sha" "莎" "厦")
+    ("shan" "杉")
+    ("shang" "裳")
+    ("she" "赊" "蛇" "摄")
+    ("shen" "莘" "沈" "参" "什")
+    ("shi" "食" "适" "识" "石" "氏")
+    ("shu" "熟" "漱" "术" "数" "戌" "属")
+    ("shuai" "衰" "率")
+    ("shui" "谁")
+    ("shuo" "说")
+    ("si" "似")
+    ("su" "宿")
+    ("suo" "缩" "梭")
+    ("tang" "汤" "倘")
+    ("tao" "陶")
+    ("ti" "提")
+    ("ting" "铤" "烃")
+    ("tui" "褪")
+    ("tun" "屯" "囤")
+    ("tuo" "拓" "沱")
+    ("wan" "万")
+    ("wang" "亡")
+    ("wei" "蔚" "圩")
+    ("wo" "涡" "挝")
+    ("wu" "鹜" "无")
+    ("xi" "铣" "洗" "戏")
+    ("xia" "虾" "吓")
+    ("xian" "癣" "县")
+    ("xiang" "降" "巷")
+    ("xiao" "嚣" "削")
+    ("xie" "邪" "挟" "偕")
+    ("xu" "嘘" "许")
+    ("xue" "血")
+    ("xun" "寻")
+    ("yan" "腌" "咽")
+    ("yao" "钥" "窑" "侥")
+    ("ye" "曳" "叶")
+    ("yi" "遗")
+    ("yin" "殷")
+    ("yong" "甬")
+    ("you" "柚")
+    ("yu" "尉" "吁")
+    ("yuan" "员")
+    ("yue" "约")
+    ("yun" "蕴" "熨")
+    ("za" "咋")
+    ("zai" "仔")
+    ("zang" "赃" "臧")
+    ("zao" "皂")
+    ("ze" "择")
+    ("zha" "楂" "栅" "扎")
+    ("zhan" "辗" "斩")
+    ("zhao" "召")
+    ("zhe" "着")
+    ("zhi" "炙" "殖" "挚" "峙" "吱")
+    ("zhong" "种")
+    ("zhou" "粥")
+    ("zhu" "著")
+    ("zhua" "爪")
+    ("zhuai" "拽")
+    ("zhuan" "赚")
+    ("zhui" "缀" "椎")
+    ("zhuo" "琢")
+    ("zi" "兹")
+    ("zu" "卒"))
+  "多音字最常用的读音。")
+
+(defvar pyim-pymap-duoyinzi-words
+  '(("a" "阿姨" "阿富" "阿门" "阿拉" "阿林" "黑阿" "麦阿密" "鹿城阿岙" "阿福")
+    ("ao" "拗口" "违拗")
+    ("ai" "艾滋" "艾蒿" "未艾")
+    ("bang" "翅膀" "臂膀" "重磅" "磅秤" "黄泥磅店" "蛤蚌" "蚌壳" "河蚌" "鹬蚌" "珠蚌")
+    ("bai" "叔伯" "百万")
+    ("bao" "剥皮" "超薄" "薄脆" "薄板" "薄饼" "暴晒" "暴发" "暴雨" "暴力" "风暴" "暴露" "暴风" "汉堡" 
"古堡" "地堡" "城堡" "龍堡" "卡斯堡" "麻家堡" "麦芬堡" "汉堡" "麦得堡" "麦尔堡" "曝光" "瀑河")
+    ("beng" "蚌埠")
+    ("bi" "复辟" "臂章" "螳臂" "交臂" "前臂" "一臂" "奋臂" "膀臂" "臂膀" "秘鲁" "泌阳")
+    ("bing" "屏弃" "屏气" "屏除" "屏退" "屏息")
+    ("bian" "扁桃" "方便" "方便面" "便当" "便捷")
+    ("bo" "薄荷" "单薄" "伯仲" "伯乐" "伯劳" "伯父" "大伯" "老伯" "伯母" "黄伯" "伯爵" "停泊" "淡泊" 
"尼泊" "漂泊" "鸿波" "柏林")
+    ("bu" "大埔")
+    ("can" "参谋" "参事" "总参" "参数" "参议" "参观" "参拜" "参股")
+    ("cang" "埋藏" "藏头" "秘藏" "雪藏" "藏匿" "收藏" "馆藏" "矿藏" "隐藏" "蕴藏" "藏袍" "储藏" "窖藏" 
"藏龙" "藏胞" "冷藏" "珍藏" "私藏" "藏掖" "西藏" "藏书" "藏品" "伧俗" "龙藏寺")
+    ("cen" "参差")
+    ("ceng" "不曾" "似曾" "几曾" "何曾" "曾经" "曾几" "未曾" "噌的" "一声")
+    ("cha" "刹那" "宝刹" "一刹" "喳喳")
+    ("chai" "公差" "差役" "专差" "官差" "听差" "美差" "办差" "差事" "差使" "肥差" "当差" "钦差")
+    ("chan" "颤悠" "单于" "禅学" "班禅" "禅宗" "禅堂" "禅门" "禅机" "禅杖" "禅房" "禅师" "坐禅" "参禅" 
"禅院")
+    ("chang" "周长" "细长" "长发" "三长" "长河" "长袖" "长衫" "天长" "长短" "超长" "长沙" "长春" "长远" 
"长度" "长江" "长处" "长假" "长街" "长征" "全长" "长城" "波长" "身长" "长途" "长吁" "长虹" "长方")
+    ("chao" "朝阳" "朝阳区" "朝鲜" "朝廷" "王朝" "历朝" "解嘲" "讥嘲" "自嘲" "嘲笑" "嘲弄" "冷嘲" "嘲讽" 
"绰绰" "绰起" "绰家" "剿袭" "剿说")
+    ("che" "汽车" "停车场" "车车" "黑车" "车饰")
+    ("chen" "称职" "匀称" "称心" "相称" "对称")
+    ("cheng" "职称" "简称" "总称" "官称" "代称" "称号" "称谓" "昵称" "谦称" "全称" "名称" "噌吰")
+    ("chu" "六畜" "家畜" "耕畜" "畜生" "牲畜")
+    ("chui" "椎心")
+    ("chuan" "文传" "传媒" "传销" "传情" "真传" "祖传" "传闻" "传家" "秘传" "传单" "传说")
+    ("chi" "匙子" "茶匙" "羹匙" "汤匙" "尺度" "英尺" "咫尺" "尺码" "公尺" "卡尺" "米尺" "卷尺")
+    ("chong" "重庆" "重重")
+    ("chou" "汗臭" "臭氧" "口臭" "腋臭" "臭虫" "臭骂" "臭美" "酸臭" "腐臭" "臭气" "腥臭" "臭名" "遗臭" 
"恶臭" "臭豆" "狐臭" "臭味" "臭架")
+    ("chuang" "经幢")
+    ("chuo" "绰约" "阔绰" "绰号" "宽绰")
+    ("ci" "参差" "伺候" "龟兹")
+    ("cuan" "攒钱" "攒聚" "攒动")
+    ("cuo" "撮儿" "撮要" "撮合")
+    ("da" "大街" "沓子" "龙大" "大西洋" "大昌" "大圣" "福大" "黑大" "大华" "大包" "大厦")
+    ("dao" "叨唠" "絮叨" "叨念" "叨咕" "念叨" "唠叨" "叨叨" "磨叨")
+    ("dai" "大夫")
+    ("dan" "西单" "东单" "清单" "报单" "单利" "名单" "单姓" "单亲" "单线" "单科" "单间" "单挑" "单价" 
"单词" "子弹")
+    ("de" "似的" "总的" "中的" "别的")
+    ("deng" "澄清")
+    ("di" "怎的" "无的" "有的" "目的" "标的" "打的" "的确" "的当" "的士" "上地" "大地" "天地" "提防")
+    ("diao" "蓝调" "蓝调吧" "调调" "音调" "论调" "格调" "调令" "低调" "笔调" "基调" "强调" "声调" "滥调" 
"老调" "色调" "单调" "腔调" "跑调" "曲调" "步调" "语调" "主调" "情调")
+    ("du" "都会" "国都" "都城" "古都" "故都" "大都" "首都" "成都" "旧都" "都市" "龙都" "鼎都" "鹤都" 
"鹏都" "鸿都" "麦度" "态度" "读书" "法度" "宽度" "进度")
+    ("dou" "全都" "句读")
+    ("duo" "测度" "忖度" "揣度" "猜度")
+    ("dun" "粮囤")
+    ("e" "阿谀" "阿胶" "阿弥" "恶心")
+    ("fan" "番茄")
+    ("fo" "佛塔" "佛徒" "佛牙" "佛教")
+    ("fu" "仿佛" "果脯")
+    ("fou" "是否" "与否")
+    ("ga" "咖喱" "伽马")
+    ("gang" "扛鼎")
+    ("ge" "革命" "皮革" "鹰革" "蛤蚧" "文蛤" "蛤蜊" "咯吱" "咯噔" "咯咯")
+    ("geng" "脖颈")
+    ("gong" "女红")
+    ("gu" "布谷" "谷物" "谷地" "硅谷" "中鹄" "麦谷" "麓谷" "鹭谷")
+    ("gui" "龟山" "龟士" "龟博" "龟仔" "鹿龟" "龟汁" "龟苓" "龟顶")
+    ("gua" "挺括" "顶呱" "呱呱" "呱唧" "呱嗒")
+    ("guan" "纶巾" "东莞")
+    ("guang" "广州" "广东" "广播")
+    ("ha" "蛤蟆" "癞蛤" "虾蟆")
+    ("hai" "还是" "还有")
+    ("hao" "貉子" "貉绒")
+    ("hang" "总行" "分行" "支行" "行业" "排行" "行情" "央行" "商行" "外行" "银行" "商行" "酒行" "麻行" 
"琴行" "巷道")
+    ("he" "嘉和" "和睦" "亲和" "龙和" "之貉" "威吓" "恫吓" "恐吓" "鼎和" "锦和" "麒和苑" "合资" "鸿合")
+    ("heng" "道行")
+    ("hu" "鹄望" "鸿鹄" "鹄立")
+    ("huan" "鹂还")
+    ("hui" "会馆" "会展" "会所" "协会" "国会" "会堂")
+    ("hong" "红装" "红牌" "红木" "红人")
+    ("huo" "软和" "热和" "暖和")
+    ("ji" "病革" "给养" "自给" "给水" "薪给" "给予" "供给" "稽考" "稽查" "稽核" "滑稽" "稽留" "缉获" 
"缉查" "缉私" "缉捕" "狼藉" "奇数" "亟待" "亟须" "亟亟" "亟需" "诘屈" "荠菜")
+    ("jia" "雪茄" "瑜伽" "伽利略")
+    ("jian" "龙见")
+    ("jiang" "降温" "降低" "降旗" "下降" "倔强")
+    ("jiao" "嚼舌" "嚼子" "细嚼" "平角" "视角" "海角" "龙角" "鹿角" "围剿" "征剿" "饺子")
+    ("jie" "解放" "慰藉" "蕴藉" "盘诘" "诘难" "诘问" "反诘")
+    ("jin" "矜夸" "矜持" "骄矜" "自矜")
+    ("jing" "颈项" "颈椎" "引颈" "长颈" "宫颈" "瓶颈" "龙颈" "黑颈鹤" "鹿颈" "景色" "帝景" "劲松")
+    ("ju" "咀嚼" "桔汁")
+    ("jun" "平均" "鸿均")
+    ("juan" "棚圈" "圈养")
+    ("jv" "咀嚼" "趑趄")
+    ("jvan" "猪圈" "羊圈")
+    ("jue" "主角" "角色" "旦角" "女角" "丑角" "角力" "名角" "配角" "咀嚼" "直觉" "感觉" "错觉" "触觉" 
"幻觉")
+    ("jun" "龟裂")
+    ("jvn" "龟裂")
+    ("ka" "咖啡" "磁卡" "贺卡" "卡拉" "胸卡" "声卡" "卡片" "绿卡" "卡通" "网卡" "卡口" "龙卡" "咯痰" 
"咯血")
+    ("ke" "咳嗽" "干咳" "贝壳" "蚌壳" "外壳" "蛋壳" "脑壳" "弹壳")
+    ("keng" "吭声" "吭气" "吭哧")
+    ("kuai" "会计" "财会")
+    ("la" "癞痢")
+    ("lai" "癞疮" "癞子" "癞蛤" "癞皮")
+    ("lao" "积潦" "络子" "落枕" "落价" "麻粩")
+    ("le" "娱乐" "玩乐" "乐趣" "美乐" "乐缘")
+    ("lei" "勒紧")
+    ("lo" "然咯")
+    ("lou" "佝偻")
+    ("long" "里弄" "弄堂")
+    ("liao" "了解" "了结" "明了" "了得" "末了" "未了" "了如" "了如指掌" "潦草" "潦倒")
+    ("liu" "碌碡" "碌碌" "劳碌" "忙碌" "庸碌")
+    ("lu" "绿林")
+    ("luo" "络腮" "部落" "落花" "日落")
+    ("lv" "频率" "机率" "比率" "效率" "胜率" "概率" "汇率" "功率" "倍率" "绿叶" "淡绿" "绿色" "绿豆" 
"伛偻" "绿洲")
+    ("lun" "丙纶" "锦纶" "经纶" "涤纶")
+    ("man" "埋怨")
+    ("mai" "山脉" "动脉" "命脉" "筋脉" "脉象" "气脉" "脉动" "脉息" "脉络" "一脉" "经脉")
+    ("mang" "流氓")
+    ("me" "黛么")
+    ("meng" "群氓")
+    ("mo" "埋没" "隐没" "脉脉" "航模" "模糊" "男模" "楷模" "规模" "劳模" "模型" "模范" "模特" "名模")
+    ("mou" "绸缪")
+    ("mi" "秘密" "秘方" "奥秘" "神秘" "泌尿" "分泌")
+    ("miu" "谬论" "纰缪")
+    ("mu" "人模" "字模" "模板" "模样" "模具" "装模" "装模做样" "模子")
+    ("na" "安娜" "娜娜" "丽娜" "黛尔娜" "黛娜" "海娜" "黑娜" "黄丽娜" "麦香娜" "优娜" "麦娜" "麟娜")
+    ("nan" "南方" "湖南")
+    ("ne" "哪吒")
+    ("ni" "毛呢" "花呢" "呢绒" "线呢" "呢料" "呢子" "呢喃")
+    ("niao" "便溺")
+    ("niu" "执拗" "拗不")
+    ("nue" "疟疾")
+    ("nuo" "婀娜" "袅娜")
+    ("nv" "女人")
+    ("nve" "疟原" "疟蚊")
+    ("pai" "迫击" "迫击炮")
+    ("pang" "膀胱" "膀肿" "磅礴")
+    ("pi" "否极" "臧否" "龙陂" "黄陂")
+    ("pian" "扁舟" "便宜")
+    ("piao" "朴姓")
+    ("ping" "屏幕" "荧屏" "银屏")
+    ("po" "朴刀")
+    ("pu" "暴十" "一曝十寒" "里堡" "十里堡" "胸脯" "肉脯" "脯子" "杏脯" "简朴" "朴质" "古朴" "朴厚" "纯朴" 
"朴素" "诚朴" "俭朴" "朴实" "淳朴" "曝晒" "瀑布" "飞瀑" "黄埔")
+    ("qiu" "龟兹")
+    ("qi" "稽首" "缉鞋" "奇妙" "传奇" "亟来" "荸荠" "蹊跷" "林栖" "鹿奇" "鹭奇" "齐天大圣" "齐天")
+    ("qia" "卡脖" "卡子" "关卡" "卡壳" "哨卡" "边卡" "发卡")
+    ("qiao" "雀盲" "雀子" "地壳" "甲壳" "躯壳")
+    ("qian" "纤手" "拉纤" "纤夫" "纤绳")
+    ("qiang" "强颜" "强人" "自强" "强烈" "强风" "强大" "黎强" "麒强" "鹤强" "龚强")
+    ("qie" "茄子" "颠茄" "番茄" "趔趄")
+    ("qin" "亲和" "亲亲" "棘矜" "矜锄")
+    ("qing" "干亲" "亲家")
+    ("qu" "小区")
+    ("quan" "转圈" "钢圈" "圆圈" "罗圈" "弧圈" "垫圈" "小圈" "眼圈")
+    ("que" "麻雀" "鸟雀" "燕雀" "孔雀" "云雀" "雀巢、")
+    ("re" "般若")
+    ("sai" "麦迪塞姆" "活塞")
+    ("se" "堵塞" "搪塞" "茅塞" "闭塞" "鼻塞" "梗塞" "阻塞" "淤塞" "拥塞" "哽塞" "月色" "彩色" "特色" 
"深色" "声色" "黛色" "黛色" "黑色瞳" "色坊")
+    ("sha" "刹车" "急刹" "急刹车" "广厦" "大厦" "商厦" "鹰大厦")
+    ("shai" "色子")
+    ("shan" "姓单" "单县" "铁杉" "杉树" "封禅" "禅让" "黒杉")
+    ("shang" "衣裳")
+    ("she" "拾级" "折本")
+    ("shen" "沙参" "野参" "参王" "人参" "红参" "丹参" "山参" "海参" "刺参" "没什" "什么" "为什" "鹿参")
+    ("sheng" "野乘" "千乘" "史乘" "盛大" "鸿盛")
+    ("shi" "钥匙" "拾荒" "捡拾" "拾物" "家什" "什物" "什锦" "麻什" "麦什" "喀什" "牛什" "见识" "知识" 
"似的" "骨殖" "饮食" "副食" "石业" "石头" "石艺" "姓氏" "上栅" "下栅")
+    ("shuai" "表率" "率性" "率直" "率真" "粗率" "率领" "轻率" "直率" "草率" "大率" "坦率" "数字" "招数" 
"基数" "数码")
+    ("shuang" "泷水")
+    ("shu" "金属" "气数" "岁数" "度数" "数据" "级数" "数控" "数学" "参数" "次数" "正数" "代数" "实数" 
"系数" "分数" "辈数")
+    ("shui" "游说")
+    ("shuo" "数见" "数见不鲜" "传说" "听说" "妄说" "实说" "胡说" "评说" "分说" "小说")
+    ("si" "窥伺" "伺弄" "伺机" "疑似" "似是" "好似" "似曾" "形似" "酷似" "貌似" "似懂" "胜似" "恰似" 
"近似" "神似" "赛似" "看似" "活似" "强似" "似乎" "类似" "相似")
+    ("su" "宿主" "宿命" "归宿" "住宿" "借宿" "寄宿" "宿营" "夜宿" "露宿" "投宿" "宿舍" "名宿" "整宿" 
"食宿")
+    ("sui" "尿泡")
+    ("ta" "拓本" "拓片" "碑拓" "疲沓" "拖沓" "杂沓" "鸿塔")
+    ("tang" "鸭汤" "鸡汤")
+    ("tao" "叨扰" "叨光" "陶器")
+    ("tan" "弹性" "弹力" "反弹")
+    ("ti" "手提" "提速" "提意" "提前" "提早" "提升" "提议" "提款" "提婚" "提包" "耳提" "提供" "麦麦提")
+    ("tiao" "空调" "调教" "烹调" "调羹" "调料" "调皮" "调控" "调节" "调整" "调价" "谐调" "协调" "调色" 
"调侃" "调味" "失调" "调治" "调频" "调剂" "调停" "调休" "调解")
+    ("ting" "域町")
+    ("tui" "褪色" "褪毛")
+    ("tuo" "拓宽" "拓荒" "开拓" "落拓" "拓展" "拓印")
+    ("tun" "囤积" "囤聚")
+    ("wei" "响尾" "尾巴" "尾灯" "船尾" "追尾" "尾椎" "月尾" "燕尾" "尾数" "年尾" "岁尾" "鸢尾" "凤尾" 
"彗尾" "尾翼" "结尾" "遗之" "龙尾" "齐鑫尾" "麻尾" "麦度" "鹿尾")
+    ("wu" "可恶" "交恶" "好恶" "厌恶" "憎恶" "嫌恶" "痛恶" "深恶")
+    ("wan" "藤蔓" "枝蔓" "瓜蔓" "蔓儿" "莞尔" "百万")
+    ("xia" "虾仁" "青虾" "大虾" "虾皮" "对虾" "虾子" "虾酱" "虾兵" "虾米" "龙虾" "噶厦" "厦门" "吓唬" 
"吓人" "惊吓" "天虾" "龙虾" "皮皮虾" "麦虾")
+    ("xi" "栖栖" "关系" "星系" "水系" "系念" "体系" "联系" "系列" "菜系" "世系" "蹊径")
+    ("xiao" "学校" "切削" "削面" "刀削" "刮削")
+    ("xian" "纤细" "光纤" "纤巧" "纤柔" "纤小" "纤维" "纤瘦" "纤纤" "化纤" "纤秀" "棉纤" "纤尘")
+    ("xiang" "街巷" "僻巷" "巷子" "龙门巷" "六巷" "龙湾巷" "龙港巷" "龙泉巷" "龙巷" "龙妙巷" "龄巷" "齐家巷" 
"鼓楼巷" "鼓巷" "黎明巷" "麻子巷" "麻园巷" "麦子巷" "鹊巷")
+    ("xie" "解数" "出血" "采血" "换血" "血糊" "尿血" "淤血" "放血" "血晕" "血淋" "便血" "吐血" "咯血" 
"叶韵" "蝎子")
+    ("xiu" "铜臭" "乳臭" "成宿" "星宿")
+    ("xin" "鸿信")
+    ("xing" "深省" "省视" "内省" "不省人事" "省悟" "省察" "旅行" "例行" "行程" "行乐" "龙行" "人行" "流行" 
"先行" "行星" "品行" "发行" "行政" "风行" "龙行" "龍行" "麟行")
+    ("xu" "牧畜" "畜产" "畜牧" "畜养" "气吁" "喘吁" "吁吁" "麦埂圩")
+    ("xue" "削减" "削弱" "削瘦" "削球" "削平" "削价" "瘦削" "剥削" "削职" "删削" "削肩" "吸血")
+    ("xun" "荨麻" "荨麻疹")
+    ("yao" "发疟" "疟子" "约斤" "称约" "钥匙" "金钥")
+    ("yan" "吞咽" "咽气" "咽喉" "殷红" "腌制" "腌肉" "腌菜" "烟草" "名烟" "烟酒")
+    ("ye" "抽咽" "哽咽" "咽炎" "下咽" "呜咽" "幽咽" "悲咽" "绿叶" "叶子" "荷叶" "落叶" "菜叶" "红叶" 
"树叶" "枫叶" "茶叶" "鸿葉")
+    ("yi" "自艾" "惩艾" "后尾")
+    ("yin" "殷勤" "殷墟" "殷切" "殷鉴")
+    ("yo" "杭育")
+    ("yu" "谷浑" "呼吁" "吁请" "吁求" "体育" "教育" "育儿" "熨帖" "熨烫")
+    ("yun" "熨斗" "电熨斗")
+    ("yue" "乐音" "器乐" "乐律" "乐章" "音乐" "乐理" "民乐" "乐队" "声乐" "奏乐" "弦乐" "乐坛" "管乐" 
"配乐" "乐曲" "乐谱" "锁钥" "密钥" "乐团" "鼓乐社" "乐器" "栎阳" "约会")
+    ("zan" "积攒")
+    ("zang" "宝藏" "藏历" "藏文" "藏香" "藏语" "藏青" "藏族" "藏医" "藏戏" "藏药" "藏蓝")
+    ("ze" "择善")
+    ("zeng" "曾孙" "曾祖")
+    ("za" "绑扎" "结扎" "包扎" "捆扎")
+    ("zai" "牛仔" "龟仔" "龙仔" "鼻仔" "羊仔" "仔仔" "麻仔" "麵包仔" "麦旺仔" "鸿仔" "煲仔" "福仔")
+    ("zha" "马扎" "挣扎" "扎啤" "扎根" "扎手" "扎针" "扎花" "扎堆" "扎营" "扎实" "稳扎" "柞水" "麻扎镇" 
"麻扎乡" "栅栏")
+    ("zhai" "择菜")
+    ("zhan" "不粘" "粘贴" "粘连")
+    ("zhao" "朝朝" "明朝" "朝晖" "朝夕" "朝思" "有朝" "今朝" "朝气" "朝三" "朝秦" "朝霞" "鹰爪" "龙爪" 
"魔爪" "爪牙" "失着" "着数" "龙爪槐")
+    ("zhe" "破折" "打折" "曲折" "折冲" "存折" "折合" "折旧" "折纸" "骨折" "折返" "折价" "折算" "波折" 
"折扇" "对折" "不折" "折扣" "七折" "折中" "拙著" "要著" "著文" "新著" "本着" "对着")
+    ("zhi" "标识" "嘎吱" "咯吱" "吱扭" "吱吱" "繁殖" "增殖" "生殖" "殖民")
+    ("zhong" "重量" "鹏重")
+    ("zhu" "属意" "著录" "撰著" "名著" "专著" "著述" "著作" "显著" "昭著" "原著" "著名" "著书" "遗著" 
"论著" "著者" "编著" "卓著" "译著" "著称")
+    ("zhui" "椎骨" "尾椎" "椎间" "腰椎" "胸椎" "颈椎" "脊椎")
+    ("zhuo" "执著" "着装" "着落" "着意" "着力" "附着" "着笔" "胶着" "着手" "着重" "穿着" "衣着" "执着" 
"着眼" "着墨" "着实" "沉着" "着陆" "着想" "着色")
+    ("zhuang" "幢房" "一幢" "幢楼")
+    ("zi" "吱声" "来兹" "今兹" "仔细" "仔猪")
+    ("zu" "沐足" "足道")
+    ("zuo" "撮毛" "小撮" "柞绸" "柞蚕" "柞树" "柞木")
+    ("zui" "咀唇" "尖沙咀" "黄达咀" "黄土咀" "鹰咀"))
+  "多音字对应的词组。")
+
 (defvar pyim-pymap--py2cchar-cache1 nil
   "拼音查汉字功能需要的变量.
 
@@ -478,11 +893,18 @@
 
 类似: \"艾\" -> (\"yi\" \"ai\")")
 
+(defvar pyim-pymap--py2duoyinzi-cache1 nil
+  "汉字转拼音功能需要的变量")
+
+(defvar pyim-pymap--py2duoyinzi-cache2 nil
+  "汉字转拼音功能需要的变量")
+
 ;; ** "汉字 -> 拼音" 以及 "拼音 -> 汉字" 的转换函数
 (defun pyim-pymap-cache-create (&optional force)
   "创建 pymap 相关的 cache."
   (pyim-pymap--cchar2py-cache-create force)
-  (pyim-pymap--py2cchar-cache-create force))
+  (pyim-pymap--py2cchar-cache-create force)
+  (pyim-pymap--py2duoyinzi-cache-create force))
 
 (defun pyim-pymap--cchar2py-cache-create (&optional force)
   "Build pinyin cchar->pinyin hashtable from `pyim-pymap'.
@@ -533,6 +955,28 @@ If FORCE is non-nil, FORCE build."
             (puthash key (delete-dups `(,@orig-value ,@cchars))
                      pyim-pymap--py2cchar-cache3)))))))
 
+(defun pyim-pymap--py2duoyinzi-cache-create (&optional force)
+  "构建 pinyin 到多音字的缓存,如果 FORCE 设置为 t, 强制更新索引。"
+  (when (or force
+            (not pyim-pymap--py2duoyinzi-cache1)
+            (not pyim-pymap--py2duoyinzi-cache2))
+    (setq pyim-pymap--py2duoyinzi-cache1
+          (make-hash-table :size 50000 :test #'equal))
+    (setq pyim-pymap--py2duoyinzi-cache2
+          (make-hash-table :size 50000 :test #'equal))
+
+    (dolist (x pyim-pymap-duoyinzi-chars)
+      (let* ((py (car x))
+             (chars (delete-dups
+                     `(,@(cdr x) ,@(gethash py 
pyim-pymap--py2duoyinzi-cache1)))))
+        (puthash py chars pyim-pymap--py2duoyinzi-cache1)))
+
+    (dolist (x pyim-pymap-duoyinzi-words)
+      (let* ((py (car x))
+             (words (delete-dups
+                     `(,@(cdr x) ,@(gethash py 
pyim-pymap--py2duoyinzi-cache2)))))
+        (puthash py words pyim-pymap--py2duoyinzi-cache2)))))
+
 (defun pyim-pymap-py2cchar-get (pinyin &optional equal-match return-list 
include-seperator)
   "获取拼音与 PINYIN 想匹配的所有汉字.
 
@@ -578,6 +1022,14 @@ pyim 在特定的时候需要读取一个汉字的拼音,这个工作由此完
     (when (= (length key) 1)
       (gethash key pyim-pymap--cchar2py-cache))))
 
+(defun pyim-pymap-py2duoyinzi-get (pinyin &optional return-chars)
+  "获取与 PINYIN 想匹配的多音字(词)。"
+  (pyim-pymap--py2duoyinzi-cache-create)
+  (when (and pinyin (stringp pinyin))
+    (if return-chars
+        (gethash pinyin pyim-pymap--py2duoyinzi-cache1)
+      (gethash pinyin pyim-pymap--py2duoyinzi-cache2))))
+
 ;; * Footer
 (provide 'pyim-pymap)
 
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 502b7f26d1..8ba7446dd8 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -291,6 +291,10 @@
                  '("阿" "啊" "呵" "腌" "|" "嗄" "吖" "锕" "|" "|" "錒")))
   (should (equal (pyim-pymap-py2cchar-get "zhua" t)
                  '("抓挝爪||髽|膼撾檛簻")))
+  (should (equal (pyim-pymap-py2duoyinzi-get "ai")
+                 '("艾滋" "艾蒿" "未艾")))
+  (should (equal (pyim-pymap-py2duoyinzi-get "ai" t)
+                 '("艾")))
   (should (equal (mapcar (lambda (x)
                            (concat (substring x 0 1)
                                    (substring x -1)))
@@ -791,38 +795,22 @@
                    "我爱-北京-天安-门"))))
 
 (ert-deftest pyim-tests-pyim-cstring-to-pinyin ()
-  (let ((pyim-dhashcache-code2word (make-hash-table :test #'equal))
-        (str "银行很行"))
-    ;; Create code2word dcache.
-    (puthash "yin-hang-hen-xing" (list "银行很行") pyim-dhashcache-code2word)
-    ;; pyim-cstring-split-to-list
-    (should (equal (pyim-cstring-to-pinyin "银行很行")
-                   (concat "yinxinghenxing yinxinghenheng yinxinghenhang "
-                           "yinhenghenxing yinhenghenheng yinhenghenhang "
-                           "yinhanghenxing yinhanghenheng yinhanghenhang")))
-    (should (equal (pyim-cstring-to-pinyin "银行很行" t)
-                   "yxhx yxhh yxhh yhhx yhhh yhhh yhhx yhhh yhhh"))
-    (should (equal (pyim-cstring-to-pinyin "银行很行" nil "-")
-                   (concat "yin-xing-hen-xing yin-xing-hen-heng 
yin-xing-hen-hang "
-                           "yin-heng-hen-xing yin-heng-hen-heng 
yin-heng-hen-hang "
-                           "yin-hang-hen-xing yin-hang-hen-heng 
yin-hang-hen-hang")))
-    (should (equal (pyim-cstring-to-pinyin "银行很行" nil "-" t)
-                   '("yin-xing-hen-xing" "yin-xing-hen-heng" 
"yin-xing-hen-hang"
-                     "yin-heng-hen-xing" "yin-heng-hen-heng" 
"yin-heng-hen-hang"
-                     "yin-hang-hen-xing" "yin-hang-hen-heng" 
"yin-hang-hen-hang")))
-    (should (equal (pyim-cstring-to-pinyin "银行很行" nil "-" t t)
-                   '("yin-xing-hen-xing")))
-    (should (equal (pyim-cstring-to-pinyin "银行很行" nil "-" nil nil t)
-                   "yin-hang-hen-xing"))
-    (should (equal (pyim-cstring-to-pinyin "Hello 银行很行 Hi" nil "-" nil t)
-                   "Hello -yin-xing-hen-xing- Hi"))
-    ;; FIXME: 这个 test 是不合理的,不过暂时找不到简单的修复方式。
-    (should (equal (pyim-cstring-to-pinyin "Hello 银行很行 Hi" nil "-" nil nil t)
-                   (concat "Hello -yin-xing-hen-xing- Hi Hello 
-yin-xing-hen-heng- Hi "
-                           "Hello -yin-xing-hen-hang- Hi Hello 
-yin-heng-hen-xing- Hi "
-                           "Hello -yin-heng-hen-heng- Hi Hello 
-yin-heng-hen-hang- Hi "
-                           "Hello -yin-hang-hen-xing- Hi Hello 
-yin-hang-hen-heng- Hi "
-                           "Hello -yin-hang-hen-hang- Hi")))))
+  (should (equal (pyim-cstring--adjust-duoyinzi
+                  "银行传说" '(("yin") ("xing" "heng" "hang")
+                               ("zhuan" "chuan") ("yue" "shuo" "shui")))
+                 '(("yin") ("hang") ("chuan") ("shuo"))))
+
+  ;; pyim-cstring-split-to-list
+  (should (equal (pyim-cstring-to-pinyin "银行传说") "yinhangchuanshuo"))
+  (should (equal (pyim-cstring-to-pinyin "银行传说" t) "yhcs"))
+  (should (equal (pyim-cstring-to-pinyin "银行传说" nil "-") 
"yin-hang-chuan-shuo"))
+  (should (equal (pyim-cstring-to-pinyin "银行传说" nil "-" t) 
'("yin-hang-chuan-shuo")))
+  (should (equal (pyim-cstring-to-pinyin "银行传说" nil "-" t t) 
'("yin-hang-chuan-shuo")))
+  (should (equal (pyim-cstring-to-pinyin "Hello 银行传说 Hi" nil "-" nil t)
+                 "Hello -yin-hang-chuan-shuo- Hi"))
+  ;; FIXME: 这个 test 是不合理的,不过暂时找不到简单的修复方式。
+  (should (equal (pyim-cstring-to-pinyin "Hello 银行传说 Hi" nil "-" nil nil t)
+                 "Hello -yin-hang-chuan-shuo- Hi")))
 
 (ert-deftest pyim-tests-pyim-cstring-to-xingma ()
   (let ((pyim-dhashcache-word2code (make-hash-table :test #'equal))

Reply via email to