I still advocate solving only MY problem, with a simple change: https://bugs.debian.org/cgi-bin/bugreport.cgi?att=2;bug=929923;filename=929923.patch;msg=22
However, I also considered a complex change: generate debian/*.links automatically from */dictionaries.xcu. A proof-of-concept is attached. If this is interesting, I can look at merging it into debian/helper.py. If this is not interesting, I'm happy to just forget about it :-)
#!/usr/bin/python3 """Create symlinks to simulate missing dictionaries.xcu. LibreOffice provides spelling/hyphenation/thesaurus dictionaries for different language varieties (xx_YY). When another variety is similar, and no dedicated dictionary is available, they are aliased together. LibreOffice defines these aliases in a "dictionaries.xcu" file. For example, de_AT has its own hyphenation dictionary, but re-uses de_DE's thesaurus dictionary. https://sources.debian.org/src/libreoffice-dictionaries/1:6.3.0-1/dictionaries/de/dictionaries.xcu/#L46 https://sources.debian.org/src/libreoffice-dictionaries/1:6.3.0-1/dictionaries/de/dictionaries.xcu/#L80 Debian does not ship dictionaries.xcu files because 1. only LibreOffice understands them, but other packages use the dictionaries themselves. 2. Debian packages the spelling/hyphenation/thesaurus dictionaries separately, but dictionaries.xcu assumes they are packaged together. If your locale is set to the original language (e.g. LANG=de_DE for th_de_DE_v2.dat), this Just Works, because of fallback behaviour in the individual apps (including LibreOffice). If your locale is set to the aliased language (e.g. LANG=de_AT for th_de_DE_v2.dat), a symlink is needed to help the app "see" the dictionary. Prior to this script, those symlinks were created on an ad-hoc basis. This script tries to fully automate that process, so that 1. there is less work for the Debian maintainer; and 2. more consistent behaviour between Debian and upstream. ---Trent W. Buck, Aug 2019, https://bugs.debian.org/929923 """ import sys import glob import pprint import types import re import lxml.etree def main() -> None: for d in xcu2dicts(): for f, l in zip(d.files, d.locales): # Expand the "%origin%" variable to whatever it should be. f = f.replace('%origin%', {'DICT_SPELL': '/usr/share/hunspell', 'DICT_HYPH': '/usr/share/hyphen', 'DICT_THES': '/usr/share/mythes'}[d.format]) symlink_dst_path = f prefix = re.fullmatch( r'(' r'/usr/share/hunspell/|' r'/usr/share/hyphen/hyph_|' r'/usr/share/mythes/(?:th|thes|thesaurus)_' r').*', f).group(1) suffix = re.fullmatch( r'.*' r'((?:_v2)?\.(?:dic|aff|dat|idx))', f).group(1) symlink_src_path = ( prefix + IETF_locale_to_glibc_locale(l) + suffix) # FIXME: needs to use f'-p{package}', like helper.py:generate_installs(). if symlink_dst_path != symlink_src_path: print('', # indent for make 'dh_link', symlink_dst_path, symlink_src_path, '# ' + l, # comment sep='\t') # The upstream XCU use RFC 5646 notation (kmr-Latn-TR). # The upstream dictionaries aren't completely consistent, but mostly use glibc notation (ks_IN@devanagari). # libreoffice-dictionaries/debian/helper.py has a hand-written dict instead of this bodgy regex-replacement. def IETF_locale_to_glibc_locale(lo_locale: str) -> str: s = lo_locale # Change -Latn- to @latin (YUK!) s = re.sub(r'(.+)-Latn(-.+)?', r'\1\2@latin', s) # Change -valencia to @valencia (YUK!) s = re.sub(r'(.+)-valencia', r'\1@valencia', s) # Change xx-YY to xx_YY s = re.sub(r'([^-]+)-(.+)', r'\1_\2', s) return s # Scrape key/value pairs from the XCUs. # Example output: # [namespace(files={'%origin%/af_ZA.aff', '%origin%/af_ZA.dic'}, # format='DICT_SPELL', # locales={'af-NA', 'af-ZA'}), # namespace(files={'%origin%/hyph_af_ZA.dic'}, # format='DICT_HYPH', # locales={'af-NA', 'af-ZA'})] def xcu2dicts() -> list: acc = [] # accumulator for xcu_path in glob.glob('dictionaries/*/dictionaries.xcu'): xcu_obj = lxml.etree.parse(xcu_path) nsmap = xcu_obj.getroot().nsmap for d in xcu_obj.xpath('//node[@oor:name="Dictionaries"]/node', namespaces=nsmap): format, = d.xpath('./prop[@oor:name="Format"]/value/text()', namespaces=nsmap) files = { l for value in d.xpath('./prop[@oor:name="Locations"]/value/text()', namespaces=nsmap) for l in value.split()} locales = { l for value in d.xpath('./prop[@oor:name="Locales"]/value/text()', namespaces=nsmap) for l in value.split()} acc.append(types.SimpleNamespace( format=format, files=files, locales=locales)) return acc if __name__ == '__main__': main()
dh_link /usr/share/hunspell/af_ZA.dic /usr/share/hunspell/af_NA.dic # af-NA dh_link /usr/share/hunspell/en_GB.dic /usr/share/hunspell/en_GH.dic # en-GH dh_link /usr/share/hunspell/en_GB.aff /usr/share/hunspell/en_BS.aff # en-BS dh_link /usr/share/hunspell/en_ZA.aff /usr/share/hunspell/en_ZW.aff # en-ZW dh_link /usr/share/hunspell/en_ZA.dic /usr/share/hunspell/en_NA.dic # en-NA dh_link /usr/share/hunspell/en_US.dic /usr/share/hunspell/en_PH.dic # en-PH dh_link /usr/share/hyphen/hyph_en_GB.dic /usr/share/hyphen/hyph_en_AU.dic # en-AU dh_link /usr/share/hyphen/hyph_en_US.dic /usr/share/hyphen/hyph_en_PH.dic # en-PH dh_link /usr/share/mythes/th_en_US_v2.dat /usr/share/mythes/th_en_AU.dat # en-AU dh_link /usr/share/hunspell/gl_ES.dic /usr/share/hunspell/gl.dic # gl dh_link /usr/share/hyphen/hyph_gl.dic /usr/share/hyphen/hyph_gl_ES.dic # gl-ES dh_link /usr/share/mythes/thesaurus_gl.idx /usr/share/mythes/thesaurus_gl_ES.idx # gl-ES dh_link /usr/share/hunspell/fr.dic /usr/share/hunspell/fr_FR.dic # fr-FR dh_link /usr/share/hunspell/fr.aff /usr/share/hunspell/fr_CH.aff # fr-CH dh_link /usr/share/hyphen/hyph_fr.dic /usr/share/hyphen/hyph_fr_FR.dic # fr-FR dh_link /usr/share/mythes/thes_fr.dat /usr/share/mythes/thes_fr_FR.dat # fr-FR dh_link /usr/share/mythes/thes_fr.idx /usr/share/mythes/thes_fr_CH.idx # fr-CH dh_link /usr/share/hunspell/lt.aff /usr/share/hunspell/lt_LT.aff # lt-LT dh_link /usr/share/hyphen/hyph_lt.dic /usr/share/hyphen/hyph_lt_LT.dic # lt-LT dh_link /usr/share/hunspell/ar.dic /usr/share/hunspell/ar_EG.dic # ar-EG dh_link /usr/share/hunspell/ar.aff /usr/share/hunspell/ar_DZ.aff # ar-DZ dh_link /usr/share/mythes/th_ar.idx /usr/share/mythes/th_ar_EG.idx # ar-EG dh_link /usr/share/mythes/th_ar.dat /usr/share/mythes/th_ar_DZ.dat # ar-DZ dh_link /usr/share/hunspell/pt_PT.aff /usr/share/hunspell/pt_AO.aff # pt-AO dh_link /usr/share/mythes/th_pt_PT_v2.dat /usr/share/mythes/th_pt_PT.dat # pt-PT dh_link /usr/share/hunspell/bn_BD.dic /usr/share/hunspell/bn_IN.dic # bn-IN dh_link /usr/share/hunspell/bs_BA.aff /usr/share/hunspell/bs.aff # bs dh_link /usr/share/mythes/th_ro_RO_v2.dat /usr/share/mythes/th_ro_RO.dat # ro-RO dh_link /usr/share/hunspell/tr_TR.aff /usr/share/hunspell/tr.aff # tr dh_link /usr/share/hunspell/an_ES.aff /usr/share/hunspell/an.aff # an dh_link /usr/share/hyphen/hyph_es_ANY.dic /usr/share/hyphen/hyph_es_NI.dic # es-NI dh_link /usr/share/mythes/th_es_ANY_v2.dat /usr/share/mythes/th_es_NI.dat # es-NI dh_link /usr/share/mythes/th_es_ANY_v2.idx /usr/share/mythes/th_es_CO.idx # es-CO dh_link /usr/share/hunspell/nl_NL.dic /usr/share/hunspell/nl_BE.dic # nl-BE dh_link /usr/share/hyphen/hyph_nl_NL.dic /usr/share/hyphen/hyph_nl_BE.dic # nl-BE dh_link /usr/share/hunspell/is.dic /usr/share/hunspell/is_IS.dic # is-IS dh_link /usr/share/mythes/th_is.idx /usr/share/mythes/th_is_IS.idx # is-IS dh_link /usr/share/hunspell/bo.aff /usr/share/hunspell/bo_CN.aff # bo-CN dh_link /usr/share/mythes/th_bg_BG_v2.dat /usr/share/mythes/th_bg_BG.dat # bg-BG dh_link /usr/share/mythes/th_lv_LV_v2.dat /usr/share/mythes/th_lv_LV.dat # lv-LV dh_link /usr/share/hunspell/ca.aff /usr/share/hunspell/ca_ES.aff # ca-ES dh_link /usr/share/hunspell/ca.dic /usr/share/hunspell/ca_IT.dic # ca-IT dh_link /usr/share/hunspell/ca-valencia.dic /usr/share/hunspell/ca...@valencia.dic # ca-ES-valencia dh_link /usr/share/hyphen/hyph_ca.dic /usr/share/hyphen/hyph_ca_FR.dic # ca-FR dh_link /usr/share/mythes/th_ca_ES_v3.idx /usr/share/mythes/th_ca_FR.idx # ca-FR dh_link /usr/share/mythes/th_ca_ES_v3.dat /usr/share/mythes/th_ca...@valencia.dat # ca-ES-valencia dh_link /usr/share/hunspell/te_IN.dic /usr/share/hunspell/te.dic # te dh_link /usr/share/mythes/th_pl_PL_v2.dat /usr/share/mythes/th_pl_PL.dat # pl-PL dh_link /usr/share/mythes/th_id_ID_v2.idx /usr/share/mythes/th_id_ID.idx # id-ID dh_link /usr/share/hunspell/sr.aff /usr/share/hunspell/sr_RS.aff # sr-RS dh_link /usr/share/hunspell/sr.dic /usr/share/hunspell/sr_CS.dic # sr-CS dh_link /usr/share/hunspell/sr-Latn.dic /usr/share/hunspell/sr...@latin.dic # sr-Latn-CS dh_link /usr/share/hunspell/sr-Latn.aff /usr/share/hunspell/sr...@latin.aff # sr-Latn-RS dh_link /usr/share/hyphen/hyph_sr.dic /usr/share/hyphen/hyph_sr_RS.dic # sr-RS dh_link /usr/share/hyphen/hyph_sr-Latn.dic /usr/share/hyphen/hyph_sr...@latin.dic # sr-Latn-CS dh_link /usr/share/hyphen/hyph_sv.dic /usr/share/hyphen/hyph_sv_SE.dic # sv-SE dh_link /usr/share/mythes/th_hu_HU_v2.idx /usr/share/mythes/th_hu_HU.idx # hu-HU dh_link /usr/share/mythes/th_ru_RU_v2.idx /usr/share/mythes/th_ru_RU.idx # ru-RU dh_link /usr/share/mythes/th_it_IT_v2.dat /usr/share/mythes/th_it_IT.dat # it-IT dh_link /usr/share/mythes/th_nb_NO_v2.dat /usr/share/mythes/th_nb_NO.dat # nb-NO dh_link /usr/share/mythes/th_nn_NO_v2.dat /usr/share/mythes/th_nn_NO.dat # nn-NO dh_link /usr/share/mythes/th_sk_SK_v2.idx /usr/share/mythes/th_sk_SK.idx # sk-SK dh_link /usr/share/hunspell/gug.aff /usr/share/hunspell/gug_PY.aff # gug-PY dh_link /usr/share/hunspell/kmr_Latn.aff /usr/share/hunspell/kmr...@latin.aff # kmr-Latn-SY dh_link /usr/share/hunspell/kmr_Latn.dic /usr/share/hunspell/kmr...@latin.dic # kmr-Latn-TR dh_link /usr/share/hunspell/de_AT_frami.aff /usr/share/hunspell/de_AT.aff # de-AT dh_link /usr/share/hunspell/de_CH_frami.dic /usr/share/hunspell/de_CH.dic # de-CH dh_link /usr/share/hunspell/de_DE_frami.aff /usr/share/hunspell/de_DE.aff # de-DE dh_link /usr/share/mythes/th_de_DE_v2.dat /usr/share/mythes/th_de_AT.dat # de-AT dh_link /usr/share/mythes/th_de_CH_v2.dat /usr/share/mythes/th_de_CH.dat # de-CH dh_link /usr/share/mythes/th_sl_SI_v2.dat /usr/share/mythes/th_sl_SI.dat # sl-SI dh_link /usr/share/mythes/th_ne_NP_v2.dat /usr/share/mythes/th_ne_NP.dat # ne-NP