Hello Georg, > However, to eliminate the warning, the code also can be simplified like this: > > ``` > diff --git a/lib/trim.c b/lib/trim.c > index 162c43e1de..bfd743cf0f 100644 > --- a/lib/trim.c > +++ b/lib/trim.c > @@ -30,12 +30,6 @@ > #include "mbiter.h" > #include "xalloc.h" > > -/* Use this to suppress gcc's "...may be used before initialized" warnings. > */ > -#if defined GCC_LINT || defined lint > -# define IF_LINT(Code) Code > -#else > -# define IF_LINT(Code) /* empty */ > -#endif > > char * > trim2 (const char *s, int how) > @@ -66,7 +60,6 @@ trim2 (const char *s, int how) > if (how != TRIM_LEADING) > { > unsigned int state = 0; > - char *r IF_LINT (= NULL); /* used only while state = 2 */ > > mbi_init (i, d, strlen (d)); > > @@ -87,7 +80,7 @@ trim2 (const char *s, int how) > if (state == 1 && mb_isspace (mbi_cur (i))) > { > state = 2; > - r = (char *) mbi_cur_ptr (i); > + *(char *) mbi_cur_ptr (i) = 0; > } > else if (state == 2 && mb_isspace (mbi_cur (i))) > { > @@ -98,9 +91,6 @@ trim2 (const char *s, int how) > state = 1; > } > } > - > - if (state == 2) > - *r = '\0'; > } > } > else > ``` > > In that way the warning is reliably eliminated, there is no need anymore > for clever macro tricks and arguably the code is now easier to grasp.
Thank you for the constructive proposal. Before modifying any algorithmic code, I want to have a unit test — because otherwise the risk of regression too high. So, I added unit tests that A. test a variety of ASCII strings with ASCII spaces in the C locale, B. test the same thing in multibyte locales, C. test additionally some Unicode spaces in these multibyte locales. What I saw is that the tests B fail: PASS: test-trim1.sh FAIL: test-trim2.sh FAIL: test-trim3.sh FAIL: test-trim2.sh =================== test-trim.c:54: assertion 'strcmp (result, "") == 0' failed Aborted FAIL test-trim2.sh (exit status: 134) FAIL: test-trim3.sh =================== test-trim.c:54: assertion 'strcmp (result, "") == 0' failed Aborted FAIL test-trim3.sh (exit status: 134) In other words, the function is already buggy since the beginning, since it produces different results for ASCII inputs depending on whether the locale is multibyte or unibyte! After applying your patch, the same tests continued to fail. So, I went for a more radical simplification. It does not elicit a warning any more. Thanks for your input; without it, we would still be sitting on this bug in the future... 2023-04-02 Bruno Haible <br...@clisp.org> trim: Fix trim_trailing result in multibyte locales. * lib/trim.c (trim2): Simplify algorithm for trim_trailing in multibyte locales, to use 2 instead of 3 states. trim: Add tests. * tests/test-trim.c: New file. * tests/test-trim1.sh: New file. * tests/test-trim2.sh: New file. * tests/test-trim3.sh: New file. * modules/trim-tests: New file.
>From 200a5192f650ab3063a3971790c15a65d5c31b2e Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Sun, 2 Apr 2023 20:40:18 +0200 Subject: [PATCH 1/6] trim: Add tests. * tests/test-trim.c: New file. * tests/test-trim1.sh: New file. * tests/test-trim2.sh: New file. * tests/test-trim3.sh: New file. * modules/trim-tests: New file. --- ChangeLog | 9 +++ modules/trim-tests | 22 ++++++ tests/test-trim.c | 158 ++++++++++++++++++++++++++++++++++++++++++++ tests/test-trim1.sh | 5 ++ tests/test-trim2.sh | 15 +++++ tests/test-trim3.sh | 15 +++++ 6 files changed, 224 insertions(+) create mode 100644 modules/trim-tests create mode 100644 tests/test-trim.c create mode 100644 tests/test-trim1.sh create mode 100644 tests/test-trim2.sh create mode 100644 tests/test-trim3.sh diff --git a/ChangeLog b/ChangeLog index 463274abee..a559f3dd71 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2023-04-02 Bruno Haible <br...@clisp.org> + + trim: Add tests. + * tests/test-trim.c: New file. + * tests/test-trim1.sh: New file. + * tests/test-trim2.sh: New file. + * tests/test-trim3.sh: New file. + * modules/trim-tests: New file. + 2023-04-02 Bruno Haible <br...@clisp.org> unistr/u8-strstr: Simplify code. diff --git a/modules/trim-tests b/modules/trim-tests new file mode 100644 index 0000000000..326b800c3f --- /dev/null +++ b/modules/trim-tests @@ -0,0 +1,22 @@ +Files: +tests/test-trim1.sh +tests/test-trim2.sh +tests/test-trim3.sh +tests/test-trim.c +tests/macros.h +m4/locale-fr.m4 +m4/locale-zh.m4 +m4/codeset.m4 + +Depends-on: +setlocale + +configure.ac: + +Makefile.am: +TESTS += test-trim1.sh test-trim2.sh test-trim3.sh +TESTS_ENVIRONMENT += \ + LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \ + LOCALE_ZH_CN='@LOCALE_ZH_CN@' +check_PROGRAMS += test-trim +test_trim_LDADD = $(LDADD) $(LIBUNISTRING) $(MBRTOWC_LIB) diff --git a/tests/test-trim.c b/tests/test-trim.c new file mode 100644 index 0000000000..e2f5043ee7 --- /dev/null +++ b/tests/test-trim.c @@ -0,0 +1,158 @@ +/* Tests of removing leading and/or trailing whitespaces. + Copyright (C) 2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <br...@clisp.org>, 2023. */ + +#include <config.h> + +/* Specification. */ +#include "trim.h" + +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "macros.h" + +static void +test_ascii (void) +{ + { + char *result = trim (""); + ASSERT (strcmp (result, "") == 0); + free (result); + result = trim_leading (""); + ASSERT (strcmp (result, "") == 0); + free (result); + result = trim_trailing (""); + ASSERT (strcmp (result, "") == 0); + free (result); + } + + { + char *result = trim (" "); + ASSERT (strcmp (result, "") == 0); + free (result); + result = trim_leading (" "); + ASSERT (strcmp (result, "") == 0); + free (result); + result = trim_trailing (" "); + ASSERT (strcmp (result, "") == 0); + free (result); + } + + { + char *result = trim ("Hello world"); + ASSERT (strcmp (result, "Hello world") == 0); + free (result); + result = trim_leading ("Hello world"); + ASSERT (strcmp (result, "Hello world") == 0); + free (result); + result = trim_trailing ("Hello world"); + ASSERT (strcmp (result, "Hello world") == 0); + free (result); + } + + { + char *result = trim (" Hello world"); + ASSERT (strcmp (result, "Hello world") == 0); + free (result); + result = trim_leading (" Hello world"); + ASSERT (strcmp (result, "Hello world") == 0); + free (result); + result = trim_trailing (" Hello world"); + ASSERT (strcmp (result, " Hello world") == 0); + free (result); + } + + { + char *result = trim ("Hello world "); + ASSERT (strcmp (result, "Hello world") == 0); + free (result); + result = trim_leading ("Hello world "); + ASSERT (strcmp (result, "Hello world ") == 0); + free (result); + result = trim_trailing ("Hello world "); + ASSERT (strcmp (result, "Hello world") == 0); + free (result); + } + + { + char *result = trim (" Hello world "); + ASSERT (strcmp (result, "Hello world") == 0); + free (result); + result = trim_leading (" Hello world "); + ASSERT (strcmp (result, "Hello world ") == 0); + free (result); + result = trim_trailing (" Hello world "); + ASSERT (strcmp (result, " Hello world") == 0); + free (result); + } +} + +int +main (int argc, char *argv[]) +{ + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + /* Test ASCII arguments. */ + test_ascii (); + + if (argc > 1) + switch (argv[1][0]) + { + case '1': + /* C or POSIX locale. */ + return 0; + + case '2': + /* Locale encoding is UTF-8. */ + { /* U+2002 EN SPACE */ + char *result = trim ("\342\200\202\302\267foo\342\200\202"); + ASSERT (strcmp (result, "\302\267foo") == 0); + free (result); + } + { /* U+3000 IDEOGRAPHIC SPACE */ + char *result = trim ("\343\200\200\302\267foo\343\200\200"); + ASSERT (strcmp (result, "\302\267foo") == 0); + free (result); + } + return 0; + + case '3': + /* Locale encoding is GB18030. */ + #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun) + { /* U+2002 EN SPACE */ + char *result = trim ("\201\066\243\070\241\244foo\201\066\243\070"); + ASSERT (strcmp (result, "\241\244foo") == 0); + free (result); + } + #endif + #if !(defined __FreeBSD__ || defined __DragonFly__) + { /* U+3000 IDEOGRAPHIC SPACE */ + char *result = trim ("\241\241\241\244foo\241\241"); + ASSERT (strcmp (result, "\241\244foo") == 0); + free (result); + } + #endif + return 0; + } + + return 1; +} diff --git a/tests/test-trim1.sh b/tests/test-trim1.sh new file mode 100644 index 0000000000..7d6ffe0ace --- /dev/null +++ b/tests/test-trim1.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +# Test in the "C" or "POSIX" locale. +LC_ALL=C \ +${CHECKER} ./test-trim${EXEEXT} 1 diff --git a/tests/test-trim2.sh b/tests/test-trim2.sh new file mode 100644 index 0000000000..1c14bd5d1a --- /dev/null +++ b/tests/test-trim2.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific UTF-8 locale is installed. +: "${LOCALE_FR_UTF8=fr_FR.UTF-8}" +if test $LOCALE_FR_UTF8 = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no french Unicode locale is installed" + else + echo "Skipping test: no french Unicode locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_FR_UTF8 \ +${CHECKER} ./test-trim${EXEEXT} 2 diff --git a/tests/test-trim3.sh b/tests/test-trim3.sh new file mode 100644 index 0000000000..1da4307a79 --- /dev/null +++ b/tests/test-trim3.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific GB18030 locale is installed. +: "${LOCALE_ZH_CN=zh_CN.GB18030}" +if test $LOCALE_ZH_CN = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no transitional chinese locale is installed" + else + echo "Skipping test: no transitional chinese locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_ZH_CN \ +${CHECKER} ./test-trim${EXEEXT} 3 -- 2.34.1
>From 4682328ced5f3d71b80b993a2fd3feeca0ffd250 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Sun, 2 Apr 2023 21:03:55 +0200 Subject: [PATCH 2/6] trim: Fix trim_trailing result in multibyte locales. * lib/trim.c (trim2): Simplify algorithm for trim_trailing in multibyte locales, to use 2 instead of 3 states. --- ChangeLog | 4 ++++ lib/trim.c | 43 +++++++++++-------------------------------- 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index a559f3dd71..30e39d757a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2023-04-02 Bruno Haible <br...@clisp.org> + trim: Fix trim_trailing result in multibyte locales. + * lib/trim.c (trim2): Simplify algorithm for trim_trailing in multibyte + locales, to use 2 instead of 3 states. + trim: Add tests. * tests/test-trim.c: New file. * tests/test-trim1.sh: New file. diff --git a/lib/trim.c b/lib/trim.c index 162c43e1de..ed643cd20c 100644 --- a/lib/trim.c +++ b/lib/trim.c @@ -65,42 +65,21 @@ trim2 (const char *s, int how) /* Trim trailing whitespaces. */ if (how != TRIM_LEADING) { - unsigned int state = 0; - char *r IF_LINT (= NULL); /* used only while state = 2 */ + char *start_of_spaces = NULL; mbi_init (i, d, strlen (d)); for (; mbi_avail (i); mbi_advance (i)) - { - if (state == 0 && mb_isspace (mbi_cur (i))) - continue; - - if (state == 0 && !mb_isspace (mbi_cur (i))) - { - state = 1; - continue; - } - - if (state == 1 && !mb_isspace (mbi_cur (i))) - continue; - - if (state == 1 && mb_isspace (mbi_cur (i))) - { - state = 2; - r = (char *) mbi_cur_ptr (i); - } - else if (state == 2 && mb_isspace (mbi_cur (i))) - { - /* empty */ - } - else - { - state = 1; - } - } - - if (state == 2) - *r = '\0'; + if (mb_isspace (mbi_cur (i))) + { + if (start_of_spaces == NULL) + start_of_spaces = (char *) mbi_cur_ptr (i); + } + else + start_of_spaces = NULL; + + if (start_of_spaces != NULL) + *start_of_spaces = '\0'; } } else -- 2.34.1