Hi again,

On 07/01/2015 10:54 PM, Paolo Carlini wrote:
Hi,

On 07/01/2015 10:45 PM, Andreas Schwab wrote:
Paolo Carlini <paolo.carl...@oracle.com> writes:

@@ -972,21 +972,20 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppcha
     or 0060 (`), nor one in the range D800 through DFFF inclusive.
       *PSTR must be preceded by "\u" or "\U"; it is assumed that the
-   buffer end is delimited by a non-hex digit.  Returns zero if the
-   UCN has not been consumed.
+   buffer end is delimited by a non-hex digit.  Returns one if the
+   UCN has not been consumed, zero otherwise.
The name of the function would make more sense if it returned a boolean
with true meaning success.
To be clear (the diff can be a little confusing) we are talking about _cpp_valid_ucn, the name of the function I'm patching. That said, I'm of course open to proposals about a better name for the patched function. Likewise for the values returned, which I picked for consistency with all the other functions in libcpp, the usual "unix" convention.
I stand corrected: in fact we are already using a mix of bool and int return types in those functions. Thus I'm also testing the below version, which simply changes the return type to bool with true meaning success.

Thanks!
Paolo.

///////////////////
/libcpp
2015-07-01  Paolo Carlini  <paolo.carl...@oracle.com>

        PR c++/53690
        * charset.c (_cpp_valid_ucn): Add cppchar_t * parameter and change
        return type to bool.  Fix encoding of \u0000 and \U00000000 in C++.
        (convert_ucn): Adjust call.
        * lex.c (forms_identifier_p): Likewise.
        * internal.h (_cpp_valid_ucn): Adjust declaration.

/gcc/testsuite
2015-07-01  Paolo Carlini  <paolo.carl...@oracle.com>

        PR c++/53690
        * g++.dg/cpp/pr53690.C: New.
Index: gcc/testsuite/g++.dg/cpp/pr53690.C
===================================================================
--- gcc/testsuite/g++.dg/cpp/pr53690.C  (revision 0)
+++ gcc/testsuite/g++.dg/cpp/pr53690.C  (working copy)
@@ -0,0 +1,7 @@
+// PR c++/53690
+// { dg-do compile { target c++11 } }
+
+int array1[U'\U00000000' == 0 ? 1 : -1];
+int array2[U'\u0000' == 0 ? 1 : -1];
+int array3[u'\U00000000' == 0 ? 1 : -1];
+int array4[u'\u0000' == 0 ? 1 : -1];
Index: libcpp/charset.c
===================================================================
--- libcpp/charset.c    (revision 225275)
+++ libcpp/charset.c    (working copy)
@@ -972,21 +972,20 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppcha
    or 0060 (`), nor one in the range D800 through DFFF inclusive.
 
    *PSTR must be preceded by "\u" or "\U"; it is assumed that the
-   buffer end is delimited by a non-hex digit.  Returns zero if the
-   UCN has not been consumed.
+   buffer end is delimited by a non-hex digit.  Returns false if the
+   UCN has not been consumed, true otherwise.
 
-   Otherwise the nonzero value of the UCN, whether valid or invalid,
-   is returned.  Diagnostics are emitted for invalid values.  PSTR
-   is updated to point one beyond the UCN, or to the syntactically
-   invalid character.
+   The value of the UCN, whether valid or invalid, is returned in *CP.
+   Diagnostics are emitted for invalid values.  PSTR is updated to point
+   one beyond the UCN, or to the syntactically invalid character.
 
    IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
    an identifier, or 2 otherwise.  */
 
-cppchar_t
+bool
 _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
                const uchar *limit, int identifier_pos,
-               struct normalize_state *nst)
+               struct normalize_state *nst, cppchar_t *cp)
 {
   cppchar_t result, c;
   unsigned int length;
@@ -1030,8 +1029,11 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **p
      multiple tokens in identifiers, so we can't give a helpful
      error message in that case.  */
   if (length && identifier_pos)
-    return 0;
-  
+    {
+      *cp = 0;
+      return false;
+    }
+
   *pstr = str;
   if (length)
     {
@@ -1079,10 +1081,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **p
                   (int) (str - base), base);
     }
 
-  if (result == 0)
-    result = 1;
-
-  return result;
+  *cp = result;
+  return true;
 }
 
 /* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate
@@ -1100,7 +1100,7 @@ convert_ucn (cpp_reader *pfile, const uchar *from,
   struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 
   from++;  /* Skip u/U.  */
-  ucn = _cpp_valid_ucn (pfile, &from, limit, 0, &nst);
+  _cpp_valid_ucn (pfile, &from, limit, 0, &nst, &ucn);
 
   rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
   if (rval)
Index: libcpp/internal.h
===================================================================
--- libcpp/internal.h   (revision 225275)
+++ libcpp/internal.h   (working copy)
@@ -744,9 +744,10 @@ struct normalize_state
 #define NORMALIZE_STATE_UPDATE_IDNUM(st, c)    \
   ((st)->previous = (c), (st)->prev_class = 0)
 
-extern cppchar_t _cpp_valid_ucn (cpp_reader *, const unsigned char **,
-                                const unsigned char *, int,
-                                struct normalize_state *state);
+extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **,
+                           const unsigned char *, int,
+                           struct normalize_state *state,
+                           cppchar_t *);
 extern void _cpp_destroy_iconv (cpp_reader *);
 extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
                                          unsigned char *, size_t, size_t,
Index: libcpp/lex.c
===================================================================
--- libcpp/lex.c        (revision 225275)
+++ libcpp/lex.c        (working copy)
@@ -1244,9 +1244,10 @@ forms_identifier_p (cpp_reader *pfile, int first,
       && *buffer->cur == '\\'
       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
     {
+      cppchar_t s;
       buffer->cur += 2;
       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
-                         state))
+                         state, &s))
        return true;
       buffer->cur -= 2;
     }

Reply via email to