Sometimes it is useful to be able to compare two string descriptors case-insentively, like c_strcasecmp.
2024-10-13 Bruno Haible <br...@clisp.org> string-desc: New function string_desc_c_casecmp. * lib/string-desc.h: New declaration. * lib/string-desc.c: Include <limits.h>, c-ctype.h. (string_desc_c_casecmp): New function. * modules/string-desc (Depends-on): Add c-ctype. * tests/test-string-desc.c (main): Add tests of string_desc_c_casecmp. diff --git a/lib/string-desc.c b/lib/string-desc.c index 58a87e1220..78992729bb 100644 --- a/lib/string-desc.c +++ b/lib/string-desc.c @@ -25,10 +25,12 @@ /* Specification and inline definitions. */ #include "string-desc.h" +#include <limits.h> #include <stdarg.h> #include <stdlib.h> #include <string.h> +#include "c-ctype.h" #include "ialloc.h" #include "full-write.h" @@ -83,6 +85,28 @@ string_desc_cmp (string_desc_t a, string_desc_t b) } } +int +string_desc_c_casecmp (string_desc_t a, string_desc_t b) +{ + /* Don't use memcasecmp here, since it uses the current locale, not the + "C" locale. */ + idx_t an = string_desc_length (a); + idx_t bn = string_desc_length (b); + const char *ap = string_desc_data (a); + const char *bp = string_desc_data (b); + idx_t n = (an < bn ? an : bn); + idx_t i; + for (i = 0; i < n; i++) + { + int ac = c_tolower ((unsigned char) ap[i]); + int bc = c_tolower ((unsigned char) bp[i]); + if (ac != bc) + return (UCHAR_MAX <= INT_MAX ? ac - bc : _GL_CMP (ac, bc)); + } + /* Here i = n = min (an, bn). */ + return _GL_CMP (an, bn); +} + ptrdiff_t string_desc_index (string_desc_t s, char c) { diff --git a/lib/string-desc.h b/lib/string-desc.h index ff9c86d6a0..0b8287121f 100644 --- a/lib/string-desc.h +++ b/lib/string-desc.h @@ -102,6 +102,12 @@ extern bool string_desc_endswith (string_desc_t s, string_desc_t suffix); 'unsigned char'. */ extern int string_desc_cmp (string_desc_t a, string_desc_t b); +/* Return > 0, == 0, or < 0 if A > B, A == B, A < B. + Either A or B must be entirely ASCII. + This uses a lexicographic ordering, where the bytes are compared as + 'unsigned char', ignoring case, in the "C" locale. */ +extern int string_desc_c_casecmp (string_desc_t a, string_desc_t b); + /* Return the index of the first occurrence of C in S, or -1 if there is none. */ extern ptrdiff_t string_desc_index (string_desc_t s, char c); diff --git a/modules/string-desc b/modules/string-desc index 047e2d390c..e3f092dd09 100644 --- a/modules/string-desc +++ b/modules/string-desc @@ -9,6 +9,7 @@ lib/string-desc-contains.c Depends-on: stdbool idx +c-ctype ialloc extern-inline memchr diff --git a/tests/test-string-desc.c b/tests/test-string-desc.c index c39077c567..fbe538687b 100644 --- a/tests/test-string-desc.c +++ b/tests/test-string-desc.c @@ -94,6 +94,23 @@ main (int argc, char *argv[]) ASSERT (string_desc_cmp (s2, s1) > 0); ASSERT (string_desc_cmp (s2, s2) == 0); + /* Test string_desc_c_casecmp. */ + ASSERT (string_desc_c_casecmp (s0, s0) == 0); + ASSERT (string_desc_c_casecmp (s0, s1) < 0); + ASSERT (string_desc_c_casecmp (s0, s2) < 0); + ASSERT (string_desc_c_casecmp (s1, s0) > 0); + ASSERT (string_desc_c_casecmp (s1, s1) == 0); + ASSERT (string_desc_c_casecmp (s1, s2) < 0); + ASSERT (string_desc_c_casecmp (s2, s0) > 0); + ASSERT (string_desc_c_casecmp (s2, s1) > 0); + ASSERT (string_desc_c_casecmp (s2, s2) == 0); + ASSERT (string_desc_c_casecmp (string_desc_from_c ("acab"), string_desc_from_c ("AcAB")) == 0); + ASSERT (string_desc_c_casecmp (string_desc_from_c ("AcAB"), string_desc_from_c ("acab")) == 0); + ASSERT (string_desc_c_casecmp (string_desc_from_c ("aca"), string_desc_from_c ("AcAB")) < 0); + ASSERT (string_desc_c_casecmp (string_desc_from_c ("AcAB"), string_desc_from_c ("aca")) > 0); + ASSERT (string_desc_c_casecmp (string_desc_from_c ("aca"), string_desc_from_c ("Aca\377")) < 0); + ASSERT (string_desc_c_casecmp (string_desc_from_c ("Aca\377"), string_desc_from_c ("aca")) > 0); + /* Test string_desc_index. */ ASSERT (string_desc_index (s0, 'o') == -1); ASSERT (string_desc_index (s2, 'o') == 12);