Author: mturk Date: Fri Aug 28 17:41:29 2009 New Revision: 808967 URL: http://svn.apache.org/viewvc?rev=808967&view=rev Log: Fix token counters
Modified: commons/sandbox/runtime/trunk/src/main/native/shared/string.c commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c Modified: commons/sandbox/runtime/trunk/src/main/native/shared/string.c URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/shared/string.c?rev=808967&r1=808966&r2=808967&view=diff ============================================================================== --- commons/sandbox/runtime/trunk/src/main/native/shared/string.c (original) +++ commons/sandbox/runtime/trunk/src/main/native/shared/string.c Fri Aug 28 17:41:29 2009 @@ -944,6 +944,22 @@ return NULL; } +static char *strspc_q(const char *s1) +{ + int s0; + + /* Some early sanity check */ + if (!s1 || !*s1) + return NULL; + while ((s0 = *s1++) != 0) { + if (acr_isspace(s0)) + return (char *)(s1 - 1); + if (s0 == '\\' && *s1) + s1++; /* We have something escaped. Advance */ + } + return NULL; +} + static wchar_t *wcschr_q(const wchar_t *s1, int ch) { int s0; @@ -960,6 +976,22 @@ return NULL; } +static wchar_t *wcsspc_q(const wchar_t *s1) +{ + int s0; + + /* Some early sanity check */ + if (!s1 || !*s1) + return NULL; + while ((s0 = *s1++) != 0) { + if (iswspace(s0)) + return (wchar_t *)(s1 - 1); + if (s0 == L'\\' && *s1) + s1++; /* We have something escaped. Advance */ + } + return NULL; +} + ACR_DECLARE(char *) ACR_strctok(char *str, int sep, char **last) { char *tok; @@ -1028,6 +1060,7 @@ ACR_DECLARE(char *) ACR_strqtok(char *str, char **last) { + int chr; char *tok; if (!str) /* subsequent call */ @@ -1037,11 +1070,10 @@ while (*str && acr_isspace(*str)) /* skip leading delimiters */ str++; if (*str == '"') { - int ch; /* Advance to the first unescaped quote */ tok = str + 1; - while ((ch = *tok++) != 0) { - if (ch == '"') { + while ((chr = *tok++) != 0) { + if (chr == '"') { if (*tok) { *tok++ = '\0'; *last = tok; @@ -1050,7 +1082,7 @@ *last = NULL; return str; } - if (ch == '\\' && *tok) + if (chr == '\\' && *tok) tok++; } /* Unterminated quote */ @@ -1058,7 +1090,7 @@ /* Check for last empty token */ return *str ? str : NULL; } - if ((tok = strpbrk(str, " \t"))) { + if ((tok = strspc_q(str))) { *tok++ = '\0'; *last = tok; return str; @@ -1070,81 +1102,102 @@ } } -ACR_DECLARE(int) ACR_strnctok(const char *str, int sep) +static const char *_strcqctok(const char *str, int sep, const char **last) { - int cnt = 1; + const char *tok; - while (*str && *str == sep) /* skip leading delimiters */ - str++; - while (*str) { - if (*str == sep) { - while (*str == sep) - str++; - if (*str) - cnt++; + if (!str) /* subsequent call */ + str = *last; /* start where we left off */ + if (!str) /* no more tokens */ + return NULL; + while (*str == sep) /* skip leading delimiters */ + str++; + if (*str == '"') { + int ch; + /* Advance to the first unescaped quote */ + tok = str + 1; + while ((ch = *tok++) != 0) { + if (ch == '"') { + if (*tok) { + *last = ++tok; + } + else + *last = NULL; + return str; + } + if (ch == '\\' && *tok) + tok++; } - else - str++; + /* Unterminated quote */ + *last = NULL; + /* Check for last empty token */ + return *str ? str : NULL; + } + if ((tok = strchr_q(str, sep))) { + *last = ++tok; + return str; + } + else { + *last = NULL; + /* Check for last empty token */ + return *str ? str : NULL; } - return cnt; } -ACR_DECLARE(int) ACR_strnqctok(const char *str, int sep) +/* Same as ACR_strqtok, but doesn't modify the string. + * Used internaly for token counting. + */ +static const char *_strcqtok(const char *str, const char **last) { - int cnt = 1; + int chr; + const char *tok; - while (*str == sep) /* skip leading delimiters */ - str++; - while (*str) { - if (*str == '"') { - int ch; - str++; - /* Advance to the first unescaped quote */ - while ((ch = *str++) != 0) { - if (ch == '"') { - cnt++; - break; + if (!str) /* subsequent call */ + str = *last; /* start where we left off */ + if (!str) /* no more tokens */ + return NULL; + while (*str && acr_isspace(*str)) /* skip leading delimiters */ + str++; + if (*str == '"') { + /* Advance to the first unescaped quote */ + tok = str + 1; + while ((chr = *tok++) != 0) { + if (chr == '"') { + if (*tok) { + *last = ++tok; } - if (ch == '\\' && *str) - str++; + else + *last = NULL; + return str; } - continue; - } - if (*str == sep) { - while (*str == sep) - str++; - if (*str) - cnt++; + if (chr == '\\' && *tok) + tok++; } - else - str++; + /* Unterminated quote */ + *last = NULL; + /* Check for last empty token */ + return *str ? str : NULL; + } + if ((tok = strspc_q(str))) { + *last = ++tok; + return str; + } + else { + *last = NULL; + /* Check for last empty token */ + return *str ? str : NULL; } - return cnt; } -ACR_DECLARE(int) ACR_strnqtok(const char *str) +ACR_DECLARE(int) ACR_strnctok(const char *str, int sep) { int cnt = 1; - while (*str && acr_isspace(*str)) /* skip leading delimiters */ + while (*str && *str == sep) /* skip leading delimiters */ str++; while (*str) { - if (*str == '"') { - int ch; - str++; - /* Advance to the first unescaped quote */ - while ((ch = *str++) != 0) { - if (ch == '"') { - cnt++; - break; - } - if (ch == '\\' && *str) - str++; - } - continue; - } - if (acr_isspace(*str)) { - while (acr_isspace(*str)) + if (*str == sep) { + while (*str == sep) str++; if (*str) cnt++; @@ -1155,21 +1208,30 @@ return cnt; } -ACR_DECLARE(int) ACR_wcsnctok(const wchar_t *str, int sep) +ACR_DECLARE(int) ACR_strnqctok(const char *str, int sep) { - int cnt = 1; + const char *token; + const char *state = NULL; + int cnt = 0; + + token = _strcqctok(str, sep, &state); + while (token) { + cnt++; + token = _strcqctok(NULL, sep, &state); + } + return cnt; +} - while (*str && *str == (wchar_t)sep) /* skip leading delimiters */ - str++; - while (*str) { - if (*str == (wchar_t)sep) { - while (*str == (wchar_t)sep) - str++; - if (*str) - cnt++; - } - else - str++; +ACR_DECLARE(int) ACR_strnqtok(const char *str) +{ + const char *token; + const char *state = NULL; + int cnt = 0; + + token = _strcqtok(str, &state); + while (token) { + cnt++; + token = _strcqtok(NULL, &state); } return cnt; } @@ -1196,39 +1258,6 @@ } } -ACR_DECLARE(int) ACR_wcsnqtok(const wchar_t *str) -{ - int cnt = 1; - - while (*str && iswspace(*str)) /* skip leading delimiters */ - str++; - while (*str) { - if (*str == L'"') { - wchar_t ch; - str++; - /* Advance to the first unescaped quote */ - while ((ch = *str++) != 0) { - if (ch == L'"') { - cnt++; - break; - } - if (ch == L'\\' && *str) - str++; - } - continue; - } - if (iswspace(*str)) { - while (iswspace(*str)) - str++; - if (*str) - cnt++; - } - else - str++; - } - return cnt; -} - ACR_DECLARE(wchar_t *) ACR_wcsqctok(wchar_t *str, int sep, wchar_t **last) { wchar_t *tok; @@ -1305,7 +1334,7 @@ /* Check for last empty token */ return *str ? str : NULL; } - if ((tok = wcspbrk(str, L" \t"))) { + if ((tok = wcsspc_q(str))) { *tok++ = L'\0'; *last = tok; return str; @@ -1317,6 +1346,137 @@ } } +static const wchar_t *_wcscqctok(const wchar_t *str, int sep, const wchar_t **last) +{ + const wchar_t *tok; + + if (!str) /* subsequent call */ + str = *last; /* start where we left off */ + if (!str) /* no more tokens */ + return NULL; + while (*str == sep) /* skip leading delimiters */ + str++; + if (*str == L'"') { + int ch; + /* Advance to the first unescaped quote */ + tok = str + 1; + while ((ch = *tok++) != 0) { + if (ch == L'"') { + if (*tok) { + *last = ++tok; + } + else + *last = NULL; + return str; + } + if (ch == L'\\' && *tok) + tok++; + } + /* Unterminated quote */ + *last = NULL; + /* Check for last empty token */ + return *str ? str : NULL; + } + if ((tok = wcschr_q(str, sep))) { + *last = ++tok; + return str; + } + else { + *last = NULL; + /* Check for last empty token */ + return *str ? str : NULL; + } +} + +static const wchar_t *_wcscqtok(const wchar_t *str, const wchar_t **last) +{ + const wchar_t *tok; + + if (!str) /* subsequent call */ + str = *last; /* start where we left off */ + if (!str) /* no more tokens */ + return NULL; + while (*str && acr_isspace(*str)) /* skip leading delimiters */ + str++; + if (*str == L'"') { + int ch; + /* Advance to the first unescaped quote */ + tok = str + 1; + while ((ch = *tok++) != 0) { + if (ch == L'"') { + if (*tok) { + *last = ++tok; + } + else + *last = NULL; + return str; + } + if (ch == L'\\' && *tok) + tok++; + } + /* Unterminated quote */ + *last = NULL; + /* Check for last empty token */ + return *str ? str : NULL; + } + if ((tok = wcsspc_q(str))) { + *last = ++tok; + return str; + } + else { + *last = NULL; + /* Check for last empty token */ + return *str ? str : NULL; + } +} + +ACR_DECLARE(int) ACR_wcsnctok(const wchar_t *str, int sep) +{ + int cnt = 1; + + while (*str && *str == (wchar_t)sep) /* skip leading delimiters */ + str++; + while (*str) { + if (*str == (wchar_t)sep) { + while (*str == (wchar_t)sep) + str++; + if (*str) + cnt++; + } + else + str++; + } + return cnt; +} + +ACR_DECLARE(int) ACR_wcsnqctok(const wchar_t *str, int sep) +{ + const wchar_t *token; + const wchar_t *state = NULL; + int cnt = 0; + + token = _wcscqctok(str, sep, &state); + while (token) { + cnt++; + token = _wcscqctok(NULL, sep, &state); + } + return cnt; +} + +ACR_DECLARE(int) ACR_wcsnqtok(const wchar_t *str) +{ + const wchar_t *token; + const wchar_t *state = NULL; + int cnt = 0; + + token = _wcscqtok(str, &state); + while (token) { + cnt++; + token = _wcscqtok(NULL, &state); + } + return cnt; +} + ACR_DECLARE(size_t) ACR_MszStrCountA(const char *s) { size_t n = 0; Modified: commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c?rev=808967&r1=808966&r2=808967&view=diff ============================================================================== --- commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c (original) +++ commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c Fri Aug 28 17:41:29 2009 @@ -362,6 +362,48 @@ return 0; } +static const char *test_strings[] = { + "The quick brown fox jumps over the lazy dog", + " The quick brown fox jumps over the lazy dog ", + "\tThe quick\t brown \tfox\t\tjumps \t\tover\t\t the \t\t lazy dog \t ", + "The \"quick brown fox\" jumps over the \"lazy dog\"", + "The \"quick brown fox\" jumps\\ over the \"lazy dog\"", + "The\\ \"quick brown fox\"\\ jumps\\ over\\ the\\ \"lazy dog\"", +#if 0 + "Lorem ipsum dolor sit amet, consectetur adipisicing elit," + "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris " + "nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in " + "reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla " + "pariatur. Excepteur sint occaecat cupidatat non proident, sunt in " + "culpa qui officia deserunt mollit anim id est laborum.", +#endif + NULL +}; + +static int test_strtok(int argc, const char *const argv[]) +{ + int i = 0; + char buf[2048]; + char *tok; + char *end; + + while (test_strings[i]) { + int c = 1; + strcpy(buf, test_strings[i]); + printf("\nTest %d %s\n", i, buf); + printf(" has %d tokens\n", ACR_strnqtok(buf)); + tok = ACR_strqtok(buf, &end); + while (tok) { + printf("Test %d.%d : '%s'\n", i, c++, tok); + tok = ACR_strqtok(NULL, &end); + } + i++; + } + return 0; +} + + int main(int argc, const char *const argv[]) { int rv = 0; @@ -433,6 +475,9 @@ else if (!strcasecmp(run_test, "ring")) { rv = test_ring(argc, argv); } + else if (!strcasecmp(run_test, "strtok")) { + rv = test_strtok(argc, argv); + } } cleanup: