From: David Bremner <[email protected]>

This initial target use is in quoting queries for Xapian. We want to
split into tokens, but preserve the delimiters between the tokens
verbatim.
---
 util/string-util.c |   12 ++++++++++++
 util/string-util.h |   19 +++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/util/string-util.c b/util/string-util.c
index b9039f4..1586483 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -34,6 +34,18 @@ strtok_len (char *s, const char *delim, size_t *len)
     return *len ? s : NULL;
 }

+char *
+strtok_len2 (char *s, const char *delim, size_t *len, size_t *delim_len)
+{
+    /* length of token */
+    *len = strcspn (s, delim);
+
+    /* length of following delimiter */
+    *delim_len = strspn (s + *len, delim);
+
+    return *len || *delim_len ? s : NULL;
+}
+

 int
 double_quote_str (void *ctx, const char *str,
diff --git a/util/string-util.h b/util/string-util.h
index 4fc7942..12398a5 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -19,6 +19,25 @@

 char *strtok_len (char *s, const char *delim, size_t *len);

+/* Like strtok_len, but return length of delimiters as well.  Return
+ * value is indicated by pointer and length, not null terminator.
+ * Does _not_ skip initial delimiters.
+ *
+ * Usage pattern:
+ *
+ * const char *tok = input;
+ * const char *delim = " :.,";
+ * size_t tok_len = 0;
+ * size_t delim_len = 0;
+ *
+ * while ((tok = strtok_len (tok + tok_len + delim_len, delim,
+ *                          &tok_len, &delim_len)) != NULL) {
+ *     // do stuff with token and following delimiters.
+ * }
+ */
+
+char *strtok_len2 (char *s, const char *delim, size_t *len, size_t *delim_len);
+
 /* Copy str to dest, surrounding with double quotes.
  * Any internal double-quotes are doubled, i.e. a"b -> "a""b"
  *
-- 
1.7.10.4

Reply via email to