Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)

Martin Sebor Wed, 03 May 2017 14:21:06 -0700

On 05/03/2017 01:59 PM, Joseph Myers wrote:

On Wed, 3 May 2017, Martin Sebor wrote:

Use contrib/config-list.mk, with a native compiler with this patch in the
PATH, to test building compilers for many configurations.  (No doubt
you'll also find existing build issues, which may or may not be filed in
Bugzilla.)


I can do some of it but not all of it.  The work doesn't involve
just building the compiler but also running the tests and fixing
up regressions in those that are written to expect the unquoted
diagnostics.  I don't have the ability to run the test suite on
all the targets, or the bandwidth to run it on a subset of them
beyond powerpc64 and x86_64.  So I'm hoping to find a way for
the core of the patch to be checked in and for the cleanup to
proceed subsequently, as target maintainers find time.


When it's architecture-specific tests, I think it's up to people testing
on those architectures to fix them.

diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h
index 13ca8ea..8932861 100644
--- a/gcc/c-family/c-format.h
+++ b/gcc/c-family/c-format.h
@@ -132,6 +132,11 @@ struct format_type_detail
 struct format_char_info
 {
   const char *format_chars;
+  /* Strings of FORMAT_CHARS characters that begin and end quoting,
+     respectively, and pairs of which should be matched in the same
+     format string.  NULL when no such pairs exist.  */
+  const char *quote_begin_chars;
+  const char *quote_end_chars;


I don't think this comment is sufficiently detailed to make it obvious
what should appear in each field for each of the four kinds of format
specifiers I enumerated.  The best I can reverse-engineer from the code
is: NULL for specifiers that may be used either quoted or unquoted *or*
listing those specifiers in both quote_begin_chars and quote_end_chars
(but I think the option of listing them in both should be removed as
conceptually wrong); if the character is an opening or closing quote, list
it in the appropriate one; if it must be used quoted, but isn't a quote
itself, both strings must be non-NULL but it must not be listed in either.

Whether that's right or wrong, the comment needs to make the rules clear.


I've clarified the comment.


Clarifying the comment is helpful, but a data structure involving putting
the same character in both still doesn't make sense to me.  It would seem
a lot clearer to (for example) split "DFKTEV" into separate "DFTV" and
"EK" cases, where "EK" uses NULL there just like "s", "d" etc. do.


Then the begin/end strings for the "DFTV" entry will be the empty
string (to indicate that they are expected to be quoted), as in
the attached incremental diff.  Let me know if I misunderstood
and you had something else in mind.

FWIW, I don't mind doing this way if you prefer, but I'm hard
pressed to see the improvement.  All it did is grow the size of
the tables.  The code stayed the same.

Martin

diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index f64b6e0..ad56835 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -688,7 +688,8 @@ static const format_char_info gcc_diag_char_table[] =
   { "K", NULL, NULL,   0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",    "",   NULL },
 
   { "r", NULL, NULL,   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "cR",   NULL },
-  { "<>'R","<'R", ">'R", 0, STD_C89, NOARGUMENTS, "",      "",   NULL },
+  { "<>","<", ">",     0, STD_C89, NOARGUMENTS, "",      "",   NULL },
+  { "'R", NULL, NULL,  0, STD_C89, NOARGUMENTS, "",      "",   NULL },
   { "m", NULL, NULL,   0, STD_C89, NOARGUMENTS, "q",     "",   NULL },
   { NULL, NULL, NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
 };
@@ -706,12 +707,13 @@ static const format_char_info gcc_tdiag_char_table[] =
   /* Custom conversion specifiers.  */
 
   /* These will require a "tree" at runtime.  */
-  { "DFKTEV", "EK", "EK", 0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
-
+  { "DFTV", "", "", 0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
+  { "EK", NULL, NULL, 0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
   { "v", NULL, NULL,   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q#",  "",   NULL },
 
   { "r", NULL, NULL,   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "cR",   NULL },
-  { "<>'R", "<'R", ">'R", 0, STD_C89, NOARGUMENTS, "",      "",   NULL },
+  { "<>", "<", ">",    0, STD_C89, NOARGUMENTS, "",      "",   NULL },
+  { "'R", NULL, NULL,  0, STD_C89, NOARGUMENTS, "",      "",   NULL },
   { "m", NULL, NULL,   0, STD_C89, NOARGUMENTS, "q",     "",   NULL },
   { "Z", NULL, NULL,   1, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "", &gcc_tdiag_char_table[0] },
   { NULL, NULL, NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
@@ -730,12 +732,14 @@ static const format_char_info gcc_cdiag_char_table[] =
   /* Custom conversion specifiers.  */
 
   /* These will require a "tree" at runtime.  */
-  { "DEFKTV", "EK", "EK", 0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
+  { "DFTV", "", "", 0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
+  { "EK", NULL, NULL, 0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
 
   { "v", NULL, NULL,   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q#",  "",   NULL },
 
   { "r", NULL, NULL,   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "cR",   NULL },
-  { "<>'R", "<'R", ">'R", 0, STD_C89, NOARGUMENTS, "",      "",   NULL },
+  { "<>", "<", ">",    0, STD_C89, NOARGUMENTS, "",      "",   NULL },
+  { "'R", NULL, NULL,  0, STD_C89, NOARGUMENTS, "",      "",   NULL },
   { "m", NULL, NULL,   0, STD_C89, NOARGUMENTS, "q",     "",   NULL },
   { "Z", NULL, NULL,   1, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "", &gcc_tdiag_char_table[0] },
   { NULL, NULL, NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
@@ -754,7 +758,8 @@ static const format_char_info gcc_cxxdiag_char_table[] =
   /* Custom conversion specifiers.  */
 
   /* These will require a "tree" at runtime.  */
-  { "ADEFKSTVX", "EK", "EK", 0,STD_C89,{ T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+#",   "",   NULL },
+  { "ADFSTVX", "", "", 0,STD_C89,{ T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+#",   "",   NULL },
+  { "EK", NULL, NULL, 0,STD_C89,{ T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+#",   "",   NULL },
 
   { "v", NULL, NULL, 0,STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q#",  "",   NULL },
 
@@ -762,7 +767,8 @@ static const format_char_info gcc_cxxdiag_char_table[] =
   { "CLOPQ", NULL, NULL, 0,STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "",   NULL },
 
   { "r", NULL, NULL,   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "cR",   NULL },
-  { "<>'R", "<'R", ">'R", 0, STD_C89, NOARGUMENTS, "",      "",   NULL },
+  { "<>", "<", ">",    0, STD_C89, NOARGUMENTS, "",      "",   NULL },
+  { "'R", NULL, NULL,  0, STD_C89, NOARGUMENTS, "",      "",   NULL },
   { "m", NULL, NULL,   0, STD_C89, NOARGUMENTS, "q",     "",   NULL },
   { "Z", NULL, NULL,   1, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "", &gcc_tdiag_char_table[0] },
   { NULL, NULL, NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
@@ -2829,9 +2835,7 @@ check_format_info_main (format_check_results *res,
       const bool suppressed = flag_chars.assignment_suppression_p (fki);
 
       /* Diagnose nested or unmatched quoting directives such as GCC's
-	 "%<...%<" and "%>...%>".  As a special case, a conversion
-	 specifier that appears in both the BEGIN and END arrays (e.g.,
-	 GCC's %E) is not diagosed.  */
+	 "%<...%<" and "%>...%>"..  */
       bool in_begin_p = (fci->quote_begin_chars
 			  && strchr (fci->quote_begin_chars, format_char));
       bool in_end_p = (fci->quote_end_chars
diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h
index 75ba117..0cfa9ef 100644
--- a/gcc/c-family/c-format.h
+++ b/gcc/c-family/c-format.h
@@ -137,9 +137,7 @@ struct format_char_info
      pairs of which should be matched in the same format string.
      Alternatively, strings of conversion specifiers that are expected
      to be used within quoted sequences (either by using the 'q' flag or
-     by being preceded by the "%<" directive).  A specifier that appears
-     in both may be used unquoted outside a quoted sequence (such as in
-     GCC's "%E").  The pointers may be NULL.  */
+     by being preceded by the "%<" directive).  The pointers may be NULL.  */
   const char *quote_begin_chars;
   const char *quote_end_chars;
   int pointer_count;

Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)

Reply via email to