I still advocate solving only MY problem, with a simple change:

    
https://bugs.debian.org/cgi-bin/bugreport.cgi?att=2;bug=929923;filename=929923.patch;msg=22

However, I also considered a complex change:
generate debian/*.links automatically from */dictionaries.xcu.

A proof-of-concept is attached.
If this is interesting, I can look at merging it into debian/helper.py.
If this is not interesting, I'm happy to just forget about it :-)
#!/usr/bin/python3

"""Create symlinks to simulate missing dictionaries.xcu.

LibreOffice provides spelling/hyphenation/thesaurus dictionaries for different language varieties (xx_YY).
When another variety is similar, and no dedicated dictionary is available, they are aliased together.
LibreOffice defines these aliases in a "dictionaries.xcu" file.

For example, de_AT has its own hyphenation dictionary, but re-uses de_DE's thesaurus dictionary.
https://sources.debian.org/src/libreoffice-dictionaries/1:6.3.0-1/dictionaries/de/dictionaries.xcu/#L46
https://sources.debian.org/src/libreoffice-dictionaries/1:6.3.0-1/dictionaries/de/dictionaries.xcu/#L80

Debian does not ship dictionaries.xcu files because

 1. only LibreOffice understands them, but
    other packages use the dictionaries themselves.

 2. Debian packages the spelling/hyphenation/thesaurus dictionaries separately, but
    dictionaries.xcu assumes they are packaged together.

If your locale is set to the original language (e.g. LANG=de_DE for
th_de_DE_v2.dat), this Just Works, because of fallback behaviour in
the individual apps (including LibreOffice).

If your locale is set to the aliased language (e.g. LANG=de_AT for
th_de_DE_v2.dat), a symlink is needed to help the app "see" the dictionary.

Prior to this script, those symlinks were created on an ad-hoc basis.
This script tries to fully automate that process, so that

 1. there is less work for the Debian maintainer; and
 2. more consistent behaviour between Debian and upstream.

---Trent W. Buck, Aug 2019, https://bugs.debian.org/929923
"""

import sys
import glob
import pprint
import types
import re

import lxml.etree


def main() -> None:
    for d in xcu2dicts():
        for f, l in zip(d.files, d.locales):
            # Expand the "%origin%" variable to whatever it should be.
            f = f.replace('%origin%',
                          {'DICT_SPELL': '/usr/share/hunspell',
                           'DICT_HYPH': '/usr/share/hyphen',
                           'DICT_THES': '/usr/share/mythes'}[d.format])
            symlink_dst_path = f
            prefix = re.fullmatch(
                r'('
                r'/usr/share/hunspell/|'
                r'/usr/share/hyphen/hyph_|'
                r'/usr/share/mythes/(?:th|thes|thesaurus)_'
                r').*',
                f).group(1)
            suffix = re.fullmatch(
                r'.*'
                r'((?:_v2)?\.(?:dic|aff|dat|idx))',
                f).group(1)
            symlink_src_path = (
                prefix +
                IETF_locale_to_glibc_locale(l) +
                suffix)

            # FIXME: needs to use f'-p{package}', like helper.py:generate_installs().
            if symlink_dst_path != symlink_src_path:
                print('',       # indent for make
                      'dh_link',
                      symlink_dst_path,
                      symlink_src_path,
                      '# ' + l,   # comment
                      sep='\t')


# The upstream XCU use RFC 5646 notation (kmr-Latn-TR).
# The upstream dictionaries aren't completely consistent, but mostly use glibc notation (ks_IN@devanagari).
# libreoffice-dictionaries/debian/helper.py has a hand-written dict instead of this bodgy regex-replacement.
def IETF_locale_to_glibc_locale(lo_locale: str) -> str:
    s = lo_locale
    # Change -Latn- to @latin  (YUK!)
    s = re.sub(r'(.+)-Latn(-.+)?', r'\1\2@latin', s)
    # Change -valencia to @valencia  (YUK!)
    s = re.sub(r'(.+)-valencia', r'\1@valencia', s)
    # Change xx-YY to xx_YY
    s = re.sub(r'([^-]+)-(.+)', r'\1_\2', s)
    return s


# Scrape key/value pairs from the XCUs.
# Example output:
#     [namespace(files={'%origin%/af_ZA.aff', '%origin%/af_ZA.dic'},
#                format='DICT_SPELL',
#                locales={'af-NA', 'af-ZA'}),
#      namespace(files={'%origin%/hyph_af_ZA.dic'},
#                format='DICT_HYPH',
#                locales={'af-NA', 'af-ZA'})]
def xcu2dicts() -> list:
    acc = []                    # accumulator
    for xcu_path in glob.glob('dictionaries/*/dictionaries.xcu'):
        xcu_obj = lxml.etree.parse(xcu_path)
        nsmap = xcu_obj.getroot().nsmap
        for d in xcu_obj.xpath('//node[@oor:name="Dictionaries"]/node', namespaces=nsmap):
            format, = d.xpath('./prop[@oor:name="Format"]/value/text()', namespaces=nsmap)
            files = {
                l
                for value in d.xpath('./prop[@oor:name="Locations"]/value/text()', namespaces=nsmap)
                for l in value.split()}
            locales = {
                l
                for value in d.xpath('./prop[@oor:name="Locales"]/value/text()', namespaces=nsmap)
                for l in value.split()}

            acc.append(types.SimpleNamespace(
                format=format,
                files=files,
                locales=locales))
    return acc


if __name__ == '__main__':
    main()
        dh_link /usr/share/hunspell/af_ZA.dic   /usr/share/hunspell/af_NA.dic   
# af-NA
        dh_link /usr/share/hunspell/en_GB.dic   /usr/share/hunspell/en_GH.dic   
# en-GH
        dh_link /usr/share/hunspell/en_GB.aff   /usr/share/hunspell/en_BS.aff   
# en-BS
        dh_link /usr/share/hunspell/en_ZA.aff   /usr/share/hunspell/en_ZW.aff   
# en-ZW
        dh_link /usr/share/hunspell/en_ZA.dic   /usr/share/hunspell/en_NA.dic   
# en-NA
        dh_link /usr/share/hunspell/en_US.dic   /usr/share/hunspell/en_PH.dic   
# en-PH
        dh_link /usr/share/hyphen/hyph_en_GB.dic        
/usr/share/hyphen/hyph_en_AU.dic        # en-AU
        dh_link /usr/share/hyphen/hyph_en_US.dic        
/usr/share/hyphen/hyph_en_PH.dic        # en-PH
        dh_link /usr/share/mythes/th_en_US_v2.dat       
/usr/share/mythes/th_en_AU.dat  # en-AU
        dh_link /usr/share/hunspell/gl_ES.dic   /usr/share/hunspell/gl.dic      
# gl
        dh_link /usr/share/hyphen/hyph_gl.dic   
/usr/share/hyphen/hyph_gl_ES.dic        # gl-ES
        dh_link /usr/share/mythes/thesaurus_gl.idx      
/usr/share/mythes/thesaurus_gl_ES.idx   # gl-ES
        dh_link /usr/share/hunspell/fr.dic      /usr/share/hunspell/fr_FR.dic   
# fr-FR
        dh_link /usr/share/hunspell/fr.aff      /usr/share/hunspell/fr_CH.aff   
# fr-CH
        dh_link /usr/share/hyphen/hyph_fr.dic   
/usr/share/hyphen/hyph_fr_FR.dic        # fr-FR
        dh_link /usr/share/mythes/thes_fr.dat   
/usr/share/mythes/thes_fr_FR.dat        # fr-FR
        dh_link /usr/share/mythes/thes_fr.idx   
/usr/share/mythes/thes_fr_CH.idx        # fr-CH
        dh_link /usr/share/hunspell/lt.aff      /usr/share/hunspell/lt_LT.aff   
# lt-LT
        dh_link /usr/share/hyphen/hyph_lt.dic   
/usr/share/hyphen/hyph_lt_LT.dic        # lt-LT
        dh_link /usr/share/hunspell/ar.dic      /usr/share/hunspell/ar_EG.dic   
# ar-EG
        dh_link /usr/share/hunspell/ar.aff      /usr/share/hunspell/ar_DZ.aff   
# ar-DZ
        dh_link /usr/share/mythes/th_ar.idx     /usr/share/mythes/th_ar_EG.idx  
# ar-EG
        dh_link /usr/share/mythes/th_ar.dat     /usr/share/mythes/th_ar_DZ.dat  
# ar-DZ
        dh_link /usr/share/hunspell/pt_PT.aff   /usr/share/hunspell/pt_AO.aff   
# pt-AO
        dh_link /usr/share/mythes/th_pt_PT_v2.dat       
/usr/share/mythes/th_pt_PT.dat  # pt-PT
        dh_link /usr/share/hunspell/bn_BD.dic   /usr/share/hunspell/bn_IN.dic   
# bn-IN
        dh_link /usr/share/hunspell/bs_BA.aff   /usr/share/hunspell/bs.aff      
# bs
        dh_link /usr/share/mythes/th_ro_RO_v2.dat       
/usr/share/mythes/th_ro_RO.dat  # ro-RO
        dh_link /usr/share/hunspell/tr_TR.aff   /usr/share/hunspell/tr.aff      
# tr
        dh_link /usr/share/hunspell/an_ES.aff   /usr/share/hunspell/an.aff      
# an
        dh_link /usr/share/hyphen/hyph_es_ANY.dic       
/usr/share/hyphen/hyph_es_NI.dic        # es-NI
        dh_link /usr/share/mythes/th_es_ANY_v2.dat      
/usr/share/mythes/th_es_NI.dat  # es-NI
        dh_link /usr/share/mythes/th_es_ANY_v2.idx      
/usr/share/mythes/th_es_CO.idx  # es-CO
        dh_link /usr/share/hunspell/nl_NL.dic   /usr/share/hunspell/nl_BE.dic   
# nl-BE
        dh_link /usr/share/hyphen/hyph_nl_NL.dic        
/usr/share/hyphen/hyph_nl_BE.dic        # nl-BE
        dh_link /usr/share/hunspell/is.dic      /usr/share/hunspell/is_IS.dic   
# is-IS
        dh_link /usr/share/mythes/th_is.idx     /usr/share/mythes/th_is_IS.idx  
# is-IS
        dh_link /usr/share/hunspell/bo.aff      /usr/share/hunspell/bo_CN.aff   
# bo-CN
        dh_link /usr/share/mythes/th_bg_BG_v2.dat       
/usr/share/mythes/th_bg_BG.dat  # bg-BG
        dh_link /usr/share/mythes/th_lv_LV_v2.dat       
/usr/share/mythes/th_lv_LV.dat  # lv-LV
        dh_link /usr/share/hunspell/ca.aff      /usr/share/hunspell/ca_ES.aff   
# ca-ES
        dh_link /usr/share/hunspell/ca.dic      /usr/share/hunspell/ca_IT.dic   
# ca-IT
        dh_link /usr/share/hunspell/ca-valencia.dic     
/usr/share/hunspell/ca...@valencia.dic  # ca-ES-valencia
        dh_link /usr/share/hyphen/hyph_ca.dic   
/usr/share/hyphen/hyph_ca_FR.dic        # ca-FR
        dh_link /usr/share/mythes/th_ca_ES_v3.idx       
/usr/share/mythes/th_ca_FR.idx  # ca-FR
        dh_link /usr/share/mythes/th_ca_ES_v3.dat       
/usr/share/mythes/th_ca...@valencia.dat # ca-ES-valencia
        dh_link /usr/share/hunspell/te_IN.dic   /usr/share/hunspell/te.dic      
# te
        dh_link /usr/share/mythes/th_pl_PL_v2.dat       
/usr/share/mythes/th_pl_PL.dat  # pl-PL
        dh_link /usr/share/mythes/th_id_ID_v2.idx       
/usr/share/mythes/th_id_ID.idx  # id-ID
        dh_link /usr/share/hunspell/sr.aff      /usr/share/hunspell/sr_RS.aff   
# sr-RS
        dh_link /usr/share/hunspell/sr.dic      /usr/share/hunspell/sr_CS.dic   
# sr-CS
        dh_link /usr/share/hunspell/sr-Latn.dic 
/usr/share/hunspell/sr...@latin.dic     # sr-Latn-CS
        dh_link /usr/share/hunspell/sr-Latn.aff 
/usr/share/hunspell/sr...@latin.aff     # sr-Latn-RS
        dh_link /usr/share/hyphen/hyph_sr.dic   
/usr/share/hyphen/hyph_sr_RS.dic        # sr-RS
        dh_link /usr/share/hyphen/hyph_sr-Latn.dic      
/usr/share/hyphen/hyph_sr...@latin.dic  # sr-Latn-CS
        dh_link /usr/share/hyphen/hyph_sv.dic   
/usr/share/hyphen/hyph_sv_SE.dic        # sv-SE
        dh_link /usr/share/mythes/th_hu_HU_v2.idx       
/usr/share/mythes/th_hu_HU.idx  # hu-HU
        dh_link /usr/share/mythes/th_ru_RU_v2.idx       
/usr/share/mythes/th_ru_RU.idx  # ru-RU
        dh_link /usr/share/mythes/th_it_IT_v2.dat       
/usr/share/mythes/th_it_IT.dat  # it-IT
        dh_link /usr/share/mythes/th_nb_NO_v2.dat       
/usr/share/mythes/th_nb_NO.dat  # nb-NO
        dh_link /usr/share/mythes/th_nn_NO_v2.dat       
/usr/share/mythes/th_nn_NO.dat  # nn-NO
        dh_link /usr/share/mythes/th_sk_SK_v2.idx       
/usr/share/mythes/th_sk_SK.idx  # sk-SK
        dh_link /usr/share/hunspell/gug.aff     /usr/share/hunspell/gug_PY.aff  
# gug-PY
        dh_link /usr/share/hunspell/kmr_Latn.aff        
/usr/share/hunspell/kmr...@latin.aff    # kmr-Latn-SY
        dh_link /usr/share/hunspell/kmr_Latn.dic        
/usr/share/hunspell/kmr...@latin.dic    # kmr-Latn-TR
        dh_link /usr/share/hunspell/de_AT_frami.aff     
/usr/share/hunspell/de_AT.aff   # de-AT
        dh_link /usr/share/hunspell/de_CH_frami.dic     
/usr/share/hunspell/de_CH.dic   # de-CH
        dh_link /usr/share/hunspell/de_DE_frami.aff     
/usr/share/hunspell/de_DE.aff   # de-DE
        dh_link /usr/share/mythes/th_de_DE_v2.dat       
/usr/share/mythes/th_de_AT.dat  # de-AT
        dh_link /usr/share/mythes/th_de_CH_v2.dat       
/usr/share/mythes/th_de_CH.dat  # de-CH
        dh_link /usr/share/mythes/th_sl_SI_v2.dat       
/usr/share/mythes/th_sl_SI.dat  # sl-SI
        dh_link /usr/share/mythes/th_ne_NP_v2.dat       
/usr/share/mythes/th_ne_NP.dat  # ne-NP

Reply via email to