About the <unicase.h> functions, Paolo Bonzini wrote in <http://lists.gnu.org/archive/html/bug-libunistring/2009-04/msg00001.html>: > It seems to me that there is a limitation, in that you cannot turn to > lowercase/uppercase/titlecase parts of a string; for that you have to > use uc_toupper/lower/title and forget about the locale-specific mappings.
This resolves this limitation. I'm adding functions to case-convert part of a string, within the context of the entire string. The main change is below. For the documentation, please look in libunistring: <http://git.savannah.gnu.org/gitweb/?p=libunistring.git;a=commitdiff;h=116c15a81610cdeab05883d1a8e5149c1964ab20#patch5> *** lib/unicase.h.orig 2009-06-29 21:47:28.000000000 +0200 --- lib/unicase.h 2009-06-29 21:47:00.000000000 +0200 *************** *** 134,139 **** --- 134,269 ---- uninorm_t nf, uint32_t *resultbuf, size_t *lengthp); + /* The case-mapping context given by a prefix string. */ + typedef struct casing_prefix_context + { + /* These fields are private, undocumented. */ + uint32_t last_char_except_ignorable; + uint32_t last_char_normal_or_above; + } + casing_prefix_context_t; + /* The case-mapping context of the empty prefix string. */ + extern const casing_prefix_context_t unicase_empty_prefix_context; + /* Return the case-mapping context of a given prefix string. */ + extern casing_prefix_context_t + u8_casing_prefix_context (const uint8_t *s, size_t n); + extern casing_prefix_context_t + u16_casing_prefix_context (const uint16_t *s, size_t n); + extern casing_prefix_context_t + u32_casing_prefix_context (const uint32_t *s, size_t n); + /* Return the case-mapping context of the prefix concat(A, S), given the + case-mapping context of the prefix A. */ + extern casing_prefix_context_t + u8_casing_prefixes_context (const uint8_t *s, size_t n, + casing_prefix_context_t a_context); + extern casing_prefix_context_t + u16_casing_prefixes_context (const uint16_t *s, size_t n, + casing_prefix_context_t a_context); + extern casing_prefix_context_t + u32_casing_prefixes_context (const uint32_t *s, size_t n, + casing_prefix_context_t a_context); + + /* The case-mapping context given by a suffix string. */ + typedef struct casing_suffix_context + { + /* These fields are private, undocumented. */ + uint32_t bits; + uint32_t unused_bits; + } + casing_suffix_context_t; + /* The case-mapping context of the empty suffix string. */ + extern const casing_suffix_context_t unicase_empty_suffix_context; + /* Return the case-mapping context of a given suffix string. */ + extern casing_suffix_context_t + u8_casing_suffix_context (const uint8_t *s, size_t n); + extern casing_suffix_context_t + u16_casing_suffix_context (const uint16_t *s, size_t n); + extern casing_suffix_context_t + u32_casing_suffix_context (const uint32_t *s, size_t n); + /* Return the case-mapping context of the suffix concat(S, A), given the + case-mapping context of the suffix A. */ + extern casing_suffix_context_t + u8_casing_suffixes_context (const uint8_t *s, size_t n, + casing_suffix_context_t a_context); + extern casing_suffix_context_t + u16_casing_suffixes_context (const uint16_t *s, size_t n, + casing_suffix_context_t a_context); + extern casing_suffix_context_t + u32_casing_suffixes_context (const uint32_t *s, size_t n, + casing_suffix_context_t a_context); + + /* Return the uppercase mapping of a string that is surrounded by a prefix + and a suffix. */ + extern uint8_t * + u8_ct_toupper (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); + extern uint16_t * + u16_ct_toupper (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); + extern uint32_t * + u32_ct_toupper (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); + + /* Return the lowercase mapping of a string that is surrounded by a prefix + and a suffix. */ + extern uint8_t * + u8_ct_tolower (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); + extern uint16_t * + u16_ct_tolower (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); + extern uint32_t * + u32_ct_tolower (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); + + /* Return the titlecase mapping of a string that is surrounded by a prefix + and a suffix. */ + extern uint8_t * + u8_ct_totitle (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); + extern uint16_t * + u16_ct_totitle (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); + extern uint32_t * + u32_ct_totitle (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); + /* Return the case folded string. Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent to comparing S1 and S2 with uN_casecmp(). *************** *** 151,156 **** --- 281,308 ---- u32_casefold (const uint32_t *s, size_t n, const char *iso639_language, uninorm_t nf, uint32_t *resultbuf, size_t *lengthp); + /* Likewise, for a string that is surrounded by a prefix and a suffix. */ + extern uint8_t * + u8_ct_casefold (const uint8_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); + extern uint16_t * + u16_ct_casefold (const uint16_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); + extern uint32_t * + u32_ct_casefold (const uint32_t *s, size_t n, + casing_prefix_context_t prefix_context, + casing_suffix_context_t suffix_context, + const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); /* Compare S1 and S2, ignoring differences in case and normalization. The nf argument identifies the normalization form to apply after the 2009-06-29 Bruno Haible <br...@clisp.org> Define u32_casefold as a wrapper around u32_ct_casefold. * lib/unicase/u32-casefold.c: Update. * modules/unicase/u32-casefold (Depends-on): Add unicase/u32-ct-casefold, unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up. Define u16_casefold as a wrapper around u16_ct_casefold. * lib/unicase/u16-casefold.c: Update. * modules/unicase/u16-casefold (Depends-on): Add unicase/u16-ct-casefold, unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up. Define u8_casefold as a wrapper around u8_ct_casefold. * lib/unicase/u-casefold.h (FUNC): Delegate to U_CT_CASEFOLD. * lib/unicase/u8-casefold.c: Update. * modules/unicase/u8-casefold (Depends-on): Add unicase/u8-ct-casefold, unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up. Define u32_totitle as a wrapper around u32_ct_totitle. * lib/unicase/u32-totitle.c: Update. * modules/unicase/u32-totitle (Depends-on): Add unicase/u32-ct-totitle, unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up. Define u16_totitle as a wrapper around u16_ct_totitle. * lib/unicase/u16-totitle.c: Update. * modules/unicase/u16-totitle (Depends-on): Add unicase/u16-ct-totitle, unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up. Define u8_totitle as a wrapper around u8_ct_totitle. * lib/unicase/u-totitle.h (is_cased, is_case_ignorable): Remove functions. (FUNC): Delegate to U_CT_TOTITLE. * lib/unicase/u8-totitle.c: Update. * modules/unicase/u8-totitle (Depends-on): Add unicase/u8-ct-totitle, unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up. * lib/unicase/u32-tolower.c (u32_tolower): Update u32_casemap invocation. * modules/unicase/u32-tolower (Depends-on): Add unicase/empty-prefix-context, unicase/empty-suffix-context. * lib/unicase/u16-tolower.c (u16_tolower): Update u16_casemap invocation. * modules/unicase/u16-tolower (Depends-on): Add unicase/empty-prefix-context, unicase/empty-suffix-context. * lib/unicase/u8-tolower.c (u8_tolower): Update u8_casemap invocation. * modules/unicase/u8-tolower (Depends-on): Add unicase/empty-prefix-context, unicase/empty-suffix-context. * lib/unicase/u32-toupper.c (u32_toupper): Update u32_casemap invocation. * modules/unicase/u32-toupper (Depends-on): Add unicase/empty-prefix-context, unicase/empty-suffix-context. * lib/unicase/u16-toupper.c (u16_toupper): Update u16_casemap invocation. * modules/unicase/u16-toupper (Depends-on): Add unicase/empty-prefix-context, unicase/empty-suffix-context. * lib/unicase/u8-toupper.c (u8_toupper): Update u8_casemap invocation. * modules/unicase/u8-toupper (Depends-on): Add unicase/empty-prefix-context, unicase/empty-suffix-context. New module 'unicase/u32-ct-casefold'. * lib/unicase/u32-ct-casefold.c: New file. * modules/unicase/u32-ct-casefold: New file. New module 'unicase/u16-ct-casefold'. * lib/unicase/u16-ct-casefold.c: New file. * modules/unicase/u16-ct-casefold: New file. New module 'unicase/u8-ct-casefold'. * lib/unicase/u8-ct-casefold.c: New file. * lib/unicase/u-ct-casefold.h: New file, derived from lib/unicase/u-casefold.h. * modules/unicase/u8-ct-casefold: New file. New module 'unicase/u32-ct-totitle'. * lib/unicase/u32-ct-totitle.c: New file. * modules/unicase/u32-ct-totitle: New file. New module 'unicase/u16-ct-totitle'. * lib/unicase/u16-ct-totitle.c: New file. * modules/unicase/u16-ct-totitle: New file. New module 'unicase/u8-ct-totitle'. * lib/unicase/u8-ct-totitle.c: New file. * lib/unicase/u-ct-totitle.h: New file, derived from lib/unicase/u-totitle.h. * modules/unicase/u8-ct-totitle: New file. New module 'unicase/u32-ct-tolower'. * lib/unicase/u32-ct-tolower.c: New file. * modules/unicase/u32-ct-tolower: New file. New module 'unicase/u16-ct-tolower'. * lib/unicase/u16-ct-tolower.c: New file. * modules/unicase/u16-ct-tolower: New file. New module 'unicase/u8-ct-tolower'. * lib/unicase/u8-ct-tolower.c: New file. * modules/unicase/u8-ct-tolower: New file. New module 'unicase/u32-ct-toupper'. * lib/unicase/u32-ct-toupper.c: New file. * modules/unicase/u32-ct-toupper: New file. New module 'unicase/u16-ct-toupper'. * lib/unicase/u16-ct-toupper.c: New file. * modules/unicase/u16-ct-toupper: New file. New module 'unicase/u8-ct-toupper'. * lib/unicase/u8-ct-toupper.c: New file. * modules/unicase/u8-ct-toupper: New file. Add context arguments to u*_casemap functions. * lib/unicase/unicasemap.h: Include unicase.h. (u8_casemap, u16_casemap, u32_casemap): Add prefix_context and suffix_context arguments. * lib/unicase/u-casemap.h (is_cased, is_case_ignorable): Remove functions. (FUNC): Add prefix_context and suffix_context arguments. Use uc_is_cased and uc_is_case_ignorable. * lib/unicase/u8-casemap.c: Include caseprop.h and context.h. * lib/unicase/u16-casemap.c: Likewise. * lib/unicase/u32-casemap.c: Likewise. * modules/unicase/u8-casemap (Files): Add lib/unicase/context.h. (Depends-on): Add unicase/cased, unicase/ignorable. Clean up. * modules/unicase/u16-casemap (Files): Add lib/unicase/context.h. (Depends-on): Add unicase/cased, unicase/ignorable. Clean up. * modules/unicase/u32-casemap (Files): Add lib/unicase/context.h. (Depends-on): Add unicase/cased, unicase/ignorable. Clean up. New module 'unicase/u32-suffix-context'. * lib/unicase/u32-suffix-context.c: New file. * modules/unicase/u32-suffix-context: New file. New module 'unicase/u16-suffix-context'. * lib/unicase/u16-suffix-context.c: New file. * modules/unicase/u16-suffix-context: New file. New module 'unicase/u8-suffix-context'. * lib/unicase/u8-suffix-context.c: New file. * lib/unicase/u-suffix-context.h: New file. * modules/unicase/u8-suffix-context: New file. New module 'unicase/empty-suffix-context'. * lib/unicase/empty-suffix-context.c: New file. * modules/unicase/empty-suffix-context: New file. New module 'unicase/u32-prefix-context'. * lib/unicase/u32-prefix-context.c: New file. * modules/unicase/u32-prefix-context: New file. New module 'unicase/u16-prefix-context'. * lib/unicase/u16-prefix-context.c: New file. * modules/unicase/u16-prefix-context: New file. New module 'unicase/u8-prefix-context'. * lib/unicase/u8-prefix-context.c: New file. * lib/unicase/u-prefix-context.h: New file. * lib/unicase/context.h: New file. * modules/unicase/u8-prefix-context: New file. New module 'unicase/empty-prefix-context'. * lib/unicase/empty-prefix-context.c: New file. * modules/unicase/empty-prefix-context: New file. New module 'unicase/ignorable'. * lib/unicase/ignorable.c: New file. * modules/unicase/ignorable: New file. New module 'unicase/cased'. * lib/unicase/caseprop.h: New file. * lib/unicase/cased.c: New file. * modules/unicase/cased: New file. New functions for case mapping of substrings. * lib/unicase.h (casing_prefix_context_t): New type. (unicase_empty_prefix_context): New variable. (u8_casing_prefix_context, u16_casing_prefix_context, u32_casing_prefix_context, u8_casing_prefixes_context, u16_casing_prefixes_context, u32_casing_prefixes_context): New declarations. (casing_suffix_context_t): New type. (unicase_empty_suffix_context): New variable. (u8_casing_suffix_context, u16_casing_suffix_context, u32_casing_suffix_context, u8_casing_suffixes_context, u16_casing_suffixes_context, u32_casing_suffixes_context, u8_ct_toupper, u16_ct_toupper, u32_ct_toupper, u8_ct_tolower, u16_ct_tolower, u32_ct_tolower, u8_ct_totitle, u16_ct_totitle, u32_ct_totitle, u8_ct_casefold, u16_ct_casefold, u32_ct_casefold): New declarations.