david at tethera.net writes:

> From: David Bremner <bremner at debian.org>
>
> This is essentially cosmetic, since success=0 is promised by
> the comments in tag-utils.h.

In an amazing display of skill and style, in attempting to abort a git
send-email run so that I could rebase that last fixup away, I managed to
send the series without the cover letter. So, no point sending the whole
thing again just for that.

This ever-growing series obsoletes 

     id:1355492062-7546-1-git-send-email-david at tethera.net

I think I decided to ignore 

      id:871uettxln.fsf at qmul.ac.uk

Perhaps that should be documented, I'm not sure. Since the batch tagging
interface also allows you to remove strangely named tags, I think it is
ok.

Hopefully the new man page clears up what is and isn't allowed for
unencoded characters. Since spaces are the only thing used as
(top-level) delimiters now, they are the only thing "compressed" by
strtok_len.

For id:87vcc2q5n2.fsf at nikula.org, there is a seperate series
id:1355716788-2940-1-git-send-email-david at tethera.net to fix the memory
leak/allocation confusion. This series needs to be rebased onto the
final version of that one. I decided _not_ to relax the requirement that
the ':' marking a prefix be un-encoded, but rather update the
documentation.

A summary of changes to the series follows; I left out the interdiff for
the notmuch-tag man page, and things that got their own new commit.

----------------------------------------------------------------------
commit 30adcab7678296b22d86da06d472c3920c336747
Author: David Bremner <bremner at debian.org>
Date:   Sat Dec 15 15:17:40 2012 -0400

    fixup: clarify TAG_FLAG_ID_ONLY comments and name

diff --git a/notmuch-restore.c b/notmuch-restore.c
index 112f2f3..1b66e76 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -208,7 +208,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char 
*argv[])
        if (input_format == DUMP_FORMAT_SUP) {
            ret = parse_sup_line (ctx, line, &query_string, tag_ops);
        } else {
-           ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | 
TAG_FLAG_ID_ONLY,
+           ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | 
TAG_FLAG_ID_DIRECT,
                                  &query_string, tag_ops);
        }

diff --git a/tag-util.c b/tag-util.c
index 8fea76c..37bffd5 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -201,7 +201,7 @@ parse_tag_line (void *ctx, char *line,
     }

     /* tok now points to the query string */
-    if (flags & TAG_FLAG_ID_ONLY) {
+    if (flags & TAG_FLAG_ID_DIRECT) {
        /* this is under the assumption that any whitespace in the
         * message-id must be hex-encoded. The check is probably not
         * perfect for exotic unicode whitespace; as fallback the
diff --git a/tag-util.h b/tag-util.h
index 7674051..eec00cf 100644
--- a/tag-util.h
+++ b/tag-util.h
@@ -28,8 +28,10 @@ typedef enum {
      */
     TAG_FLAG_BE_GENEROUS = (1 << 3),

-    /* Query consists of a single id:$message-id */
-    TAG_FLAG_ID_ONLY = (1 << 4)
+    /* Directly look up messages by hex-decoded message-id, rather
+     * than parsing a general query. The query MUST be of the form
+     * id:$message-id. */
+    TAG_FLAG_ID_DIRECT = (1 << 4)

 } tag_op_flag_t;


commit 256ec05a347b949e6b2bda0d2b6902ed8ab3fab3
Author: David Bremner <bremner at debian.org>
Date:   Sat Dec 15 19:54:05 2012 -0400

    fixup for id:87sj778ajb.fsf at qmul.ac.uk and id:87zk1fot39.fsf at 
nikula.org

diff --git a/notmuch-tag.c b/notmuch-tag.c
index 44b5bb4..5c8bad4 100644
--- a/notmuch-tag.c
+++ b/notmuch-tag.c
@@ -65,14 +65,16 @@ _optimize_tag_query (void *ctx, const char 
*orig_query_string,
     else
        query_string = talloc_asprintf (ctx, "( %s ) and (", orig_query_string);

-
     /* Boolean terms surrounded by double quotes can contain any
      * character.  Double quotes are quoted by doubling them. */

     for (i = 0; i < tag_op_list_size (list) && query_string; i++) {
-       double_quote_str (ctx,
-                         tag_op_list_tag (list, i),
-                         &escaped, &escaped_len);
+       /* XXX in case of OOM, query_string will be deallocated when
+        * ctx is, which might be at shutdown */
+       if (double_quote_str (ctx,
+                             tag_op_list_tag (list, i),
+                             &escaped, &escaped_len))
+           return NULL;

        query_string = talloc_asprintf_append_buffer (
            query_string, "%s%stag:%s", join,
diff --git a/util/string-util.c b/util/string-util.c
index ea7c25b..b9039f4 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -46,8 +46,11 @@ double_quote_str (void *ctx, const char *str,
     for (in = str; *in; in++)
        needed += (*in == '"') ? 2 : 1;

-    if (needed > *len)
-       *buf = talloc_realloc (ctx, *buf, char, 2*needed);
+    if ((*buf == NULL) || (needed > *len)) {
+       *len = 2 * needed;
+       *buf = talloc_realloc (ctx, *buf, char, *len);
+    }
+

     if (! *buf)
        return 1;
@@ -62,7 +65,7 @@ double_quote_str (void *ctx, const char *str,
        *out++ = *in++;
     }
     *out++ = '"';
-    *out = 0;
+    *out = '\0';

     return 0;
 }
diff --git a/util/string-util.h b/util/string-util.h
index b593bc7..4fc7942 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -23,7 +23,7 @@ char *strtok_len (char *s, const char *delim, size_t *len);
  * Any internal double-quotes are doubled, i.e. a"b -> "a""b"
  *
  * Output is into buf; it may be talloc_realloced
- * return 0 on success, non-zero on failure.
+ * Return: 0 on success, non-zero on memory allocation failure.
  */
 int double_quote_str (void *talloc_ctx, const char *str,
                      char **buf, size_t *len);

commit 4b26ac6f99c74cced64cfae317e6ac4b6a8f706f
Author: David Bremner <bremner at debian.org>
Date:   Mon Dec 17 23:36:30 2012 -0400

    fixup: rewrite decode+quote function for queries.

    Rather than splitting on ':' at the top level, we examine each space
    delimited token for a prefix.

diff --git a/tag-util.c b/tag-util.c
index 37bffd5..b0a846b 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -56,9 +56,14 @@ illegal_tag (const char *tag, notmuch_bool_t remove)
     return NULL;
 }

+/* Input is a hex encoded string, presumed to be a query for Xapian.
+ *
+ * Space delimited tokens are decoded and quoted, with '*' and prefixes
+ * of the form "foo:" passed through unquoted.
+ */
 static tag_parse_status_t
-quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error,
-                       char **query_string)
+unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
+                char **query_string)
 {
     char *tok = encoded;
     size_t tok_len = 0;
@@ -68,14 +73,43 @@ quote_and_decode_query (void *ctx, char *encoded, const 
char *line_for_error,

     *query_string = talloc_strdup (ctx, "");

-    while (*query_string &&
-          (tok = strtok_len (tok + tok_len, ": ", &tok_len)) != NULL) {
-       char delim = tok[tok_len];
+    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
+
+       size_t prefix_len;
+       char delim = *(tok + tok_len);

        *(tok + tok_len++) = '\0';

-       if (strcspn (tok, "%") < tok_len - 1) {
-           /* something to decode */
+       prefix_len = hex_invariant (tok, tok_len);
+
+       if ((strcmp (tok, "*") == 0) || prefix_len >= tok_len - 1) {
+
+           /* pass some things through without quoting or decoding.
+            * Note for '*' this is mandatory.
+            */
+
+           if (! (*query_string = talloc_asprintf_append_buffer (
+                      *query_string, "%s%c", tok, delim))) {
+
+               ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
+                                 line_for_error, "aborting");
+               goto DONE;
+           }
+
+       } else {
+           /* potential prefix: one for ':', then something after */
+           if ((tok_len - prefix_len > 2) && *(tok + prefix_len) == ':') {
+               if (! (*query_string = talloc_strndup_append (*query_string,
+                                                             tok,
+                                                             prefix_len + 1))) 
{
+                   ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
+                                     line_for_error, "aborting");
+                   goto DONE;
+               }
+               tok += prefix_len + 1;
+               tok_len -= prefix_len + 1;
+           }
+
            if (hex_decode_inplace (tok) != HEX_SUCCESS) {
                ret = line_error (TAG_PARSE_INVALID, line_for_error,
                                  "hex decoding of token '%s' failed", tok);
@@ -87,17 +121,14 @@ quote_and_decode_query (void *ctx, char *encoded, const 
char *line_for_error,
                                  line_for_error, "aborting");
                goto DONE;
            }
-           *query_string = talloc_asprintf_append_buffer (
-               *query_string, "%s%c", buf, delim);

-       } else {
-           /* This is not just an optimization, but used to preserve
-            * prefixes like id:, which cannot be quoted.
-            */
-           *query_string = talloc_asprintf_append_buffer (
-               *query_string, "%s%c", tok, delim);
+           if (! (*query_string = talloc_asprintf_append_buffer (
+                      *query_string, "%s%c", buf, delim))) {
+               ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
+                                 line_for_error, "aborting");
+               goto DONE;
+           }
        }
-
     }

   DONE:
@@ -220,7 +251,7 @@ parse_tag_line (void *ctx, char *line,
        /* skip 'id:' */
        *query_string = tok + 3;
     } else {
-       ret = quote_and_decode_query (ctx, tok, line_for_error, query_string);
+       ret = unhex_and_quote (ctx, tok, line_for_error, query_string);
     }

   DONE:



Reply via email to