On 05.04.2012 14:32, Bruno Haible wrote: > Hi Vladimir, > >> I'm not sure if previous time I sent with or without \0 bugfix. Resending > > > - The function __argp_get_display_len looks very similar to mbsnwidth(), > from module 'mbswidth'. Could you use that function? One of the gnulib > principles is to reuse code that is already in gnulib, where it makes sense. Remaining is the issue due to escape sequences. > - You have shown a test case as a Cyrillic string. But what is the C code > to make the behaviour explicit? Could you add this code to > tests/test-argp.c, > or create a new test file tests/test-argp-3.c? Done but the test is valid only for UTF-8 locales. Should I force some specific locale? It's impossible to make a test working in all locales since in case of e.g. ASCII we don't have such characters at all.
-- Regards Vladimir 'φ-coder/phcoder' Serbinenko
diff --git a/lib/argp-fmtstream.c b/lib/argp-fmtstream.c index 70e3eb8..d47e75f 100644 --- a/lib/argp-fmtstream.c +++ b/lib/argp-fmtstream.c @@ -29,6 +29,7 @@ #include <errno.h> #include <stdarg.h> #include <ctype.h> +#include <wchar.h> #include "argp-fmtstream.h" #include "argp-namefrob.h" @@ -116,6 +117,70 @@ weak_alias (__argp_fmtstream_free, argp_fmtstream_free) #endif #endif + +/* Return the space needed to display the range + [beg, min (end, beg + strlen (beg))). */ +size_t +__argp_get_display_width (const char *beg, const char *end) +{ + const char *ptr; + size_t r = 0; + mbstate_t ps; + + memset (&ps, 0, sizeof (ps)); + + for (ptr = beg; *ptr && ptr < end; ) + { + wchar_t wc; + size_t s; + + s = mbrtowc (&wc, ptr, end - ptr, &ps); + if (s == (size_t) -1) + break; + if (s == (size_t) -2) + { + ptr++; + r++; + continue; + } + r += wcwidth (wc); + ptr += s; + } + return r; +} + +/* Return the pointer to the first character that doesn't fit in l columns. */ +static inline const ptrdiff_t +add_width (const char *ptr, const char *end, size_t l) +{ + mbstate_t ps; + const char *ptr0 = ptr; + + memset (&ps, 0, sizeof (ps)); + + while (ptr < end) + { + wchar_t wc; + size_t s, k; + + s = mbrtowc (&wc, ptr, end - ptr, &ps); + if (s == (size_t) -1) + break; + if (s == (size_t) -2) + { + s = 1; + k = 1; + } + else + k = wcwidth (wc); + if (k >= l) + break; + l -= k; + ptr += s; + } + return ptr - ptr0; +} + /* Process FS's buffer so that line wrapping is done from POINT_OFFS to the end of its buffer. This code is mostly from glibc stdio/linewrap.c. */ void @@ -168,14 +233,15 @@ __argp_fmtstream_update (argp_fmtstream_t fs) if (!nl) { + size_t display_width = __argp_get_display_width (buf, fs->p); /* The buffer ends in a partial line. */ - if (fs->point_col + len < fs->rmargin) + if (fs->point_col + display_width < fs->rmargin) { /* The remaining buffer text is a partial line and fits within the maximum line width. Advance point for the characters to be written and stop scanning. */ - fs->point_col += len; + fs->point_col += display_width; break; } else @@ -183,14 +249,18 @@ __argp_fmtstream_update (argp_fmtstream_t fs) the end of the buffer. */ nl = fs->p; } - else if (fs->point_col + (nl - buf) < (ssize_t) fs->rmargin) - { - /* The buffer contains a full line that fits within the maximum - line width. Reset point and scan the next line. */ - fs->point_col = 0; - buf = nl + 1; - continue; - } + else + { + size_t display_width = __argp_get_display_width (buf, nl); + if (display_width < (ssize_t) fs->rmargin) + { + /* The buffer contains a full line that fits within the maximum + line width. Reset point and scan the next line. */ + fs->point_col = 0; + buf = nl + 1; + continue; + } + } /* This line is too long. */ r = fs->rmargin - 1; @@ -226,7 +296,7 @@ __argp_fmtstream_update (argp_fmtstream_t fs) char *p, *nextline; int i; - p = buf + (r + 1 - fs->point_col); + p = buf + add_width (buf, fs->p, (r + 1 - fs->point_col)); while (p >= buf && !isblank ((unsigned char) *p)) --p; nextline = p + 1; /* This will begin the next line. */ @@ -244,7 +314,7 @@ __argp_fmtstream_update (argp_fmtstream_t fs) { /* A single word that is greater than the maximum line width. Oh well. Put it on an overlong line by itself. */ - p = buf + (r + 1 - fs->point_col); + p = buf + add_width (buf, fs->p, (r + 1 - fs->point_col)); /* Find the end of the long word. */ if (p < nl) do @@ -278,7 +348,7 @@ __argp_fmtstream_update (argp_fmtstream_t fs) && fs->p > nextline) { /* The margin needs more blanks than we removed. */ - if (fs->end - fs->p > fs->wmargin + 1) + if (__argp_get_display_width (fs->p, fs->end) > fs->wmargin + 1) /* Make some space for them. */ { size_t mv = fs->p - nextline; diff --git a/lib/argp-fmtstream.h b/lib/argp-fmtstream.h index a891c3d..618e1f1 100644 --- a/lib/argp-fmtstream.h +++ b/lib/argp-fmtstream.h @@ -349,6 +349,9 @@ __argp_fmtstream_point (argp_fmtstream_t __fs) #endif /* __OPTIMIZE__ */ +size_t +__argp_get_display_width (const char *beg, const char *end); + #endif /* ARGP_FMTSTREAM_USE_LINEWRAP */ #endif /* argp-fmtstream.h */ diff --git a/lib/argp-help.c b/lib/argp-help.c index a126acb..fe3db39 100644 --- a/lib/argp-help.c +++ b/lib/argp-help.c @@ -1451,7 +1451,7 @@ argp_args_usage (const struct argp *argp, const struct argp_state *state, /* Manually do line wrapping so that it (probably) won't get wrapped at any embedded spaces. */ - space (stream, 1 + nl - cp); + space (stream, 1 + __argp_get_display_width (cp, nl)); __argp_fmtstream_write (stream, cp, nl - cp); } diff --git a/tests/test-argp-2.sh b/tests/test-argp-2.sh index 49eff9c..935cce5 100755 --- a/tests/test-argp-2.sh +++ b/tests/test-argp-2.sh @@ -33,10 +33,11 @@ func_compare() { #### # Test --usage output cat > $TMP <<EOT -Usage: test-argp [-tvCSOlp?V] [-f FILE] [-r FILE] [-o[ARG]] [--test] - [--file=FILE] [--input=FILE] [--read=FILE] [--verbose] [--cantiga] - [--sonet] [--option] [--optional[=ARG]] [--limerick] [--poem] - [--help] [--usage] [--version] ARGS... +Usage: test-argp [-tvCSOlp?V] [-c ÐиÑиллиÑа] [-f FILE] [-r FILE] [-o[ARG]] + [--ÑÑÑÑкий=ÐиÑиллиÑа] [--test] [--file=FILE] [--input=FILE] + [--read=FILE] [--verbose] [--cantiga] [--sonet] [--option] + [--optional[=ARG]] [--limerick] [--poem] [--help] [--usage] + [--version] ARGS... EOT ./test-argp$EXEEXT --usage | func_compare || ERR=1 @@ -45,10 +46,10 @@ EOT # Test working usage-indent format cat > $TMP <<EOT -Usage: test-argp [-tvCSOlp?V] [-f FILE] [-r FILE] [-o[ARG]] [--test] -[--file=FILE] [--input=FILE] [--read=FILE] [--verbose] [--cantiga] [--sonet] -[--option] [--optional[=ARG]] [--limerick] [--poem] [--help] [--usage] -[--version] ARGS... +Usage: test-argp [-tvCSOlp?V] [-c ÐиÑиллиÑа] [-f FILE] [-r FILE] [-o[ARG]] +[--ÑÑÑÑкий=ÐиÑиллиÑа] [--test] [--file=FILE] [--input=FILE] [--read=FILE] +[--verbose] [--cantiga] [--sonet] [--option] [--optional[=ARG]] [--limerick] +[--poem] [--help] [--usage] [--version] ARGS... EOT ARGP_HELP_FMT='usage-indent=0' ./test-argp$EXEEXT --usage | func_compare || ERR=1 @@ -60,6 +61,8 @@ Usage: test-argp [OPTION...] ARGS... documentation string Main options + -c, --ÑÑÑÑкий=ÐиÑиллиÑа ÐлиннÑй ÑекÑÑ, доÑÑаÑоÑно длиннÑй, ÑÑÐ¾Ð±Ñ Ð¿ÑиÑлоÑÑ + пеÑенеÑÑи его на вÑоÑÑÑ ÑÑÑÐ¾ÐºÑ -t, --test Option Group 1 diff --git a/tests/test-argp.c b/tests/test-argp.c index 6ba55e9..1a3f7dc 100644 --- a/tests/test-argp.c +++ b/tests/test-argp.c @@ -22,6 +22,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <locale.h> #if HAVE_STRINGS_H # include <strings.h> #endif @@ -213,6 +214,8 @@ struct argp_child group2_1_child = { static struct argp_option main_options[] = { { NULL, 0, NULL, 0, "Main options", 0 }, { "test", 't', NULL, 0, NULL, 1 }, + { "ÑÑÑÑкий", 'c', "ÐиÑиллиÑа", 0, "ÐлиннÑй ÑекÑÑ, доÑÑаÑоÑно длиннÑй, " + "ÑÑÐ¾Ð±Ñ Ð¿ÑиÑлоÑÑ Ð¿ÐµÑенеÑÑи его на вÑоÑÑÑ ÑÑÑокÑ", 1}, { NULL, 0, NULL, 0, NULL, 0 } }; @@ -459,6 +462,7 @@ main (int argc, char **argv) struct argp_child argp_children[3], group1_children[2], group2_children[2]; test_fp *fun; + setlocale (LC_ALL, ""); set_program_name (argv[0]); group1_children[0] = group1_1_child;
signature.asc
Description: OpenPGP digital signature