On Wed, Oct 16, 2024 at 11:09:32PM +0200, Jakub Jelinek wrote:
> Apparently my
> c: Speed up compilation of large char array initializers when not using #embed
> patch broke building glibc.
> 
> The issue is that when using CPP_EMBED, we are guaranteed by the
> preprocessor that there is CPP_NUMBER CPP_COMMA before it and
> CPP_COMMA CPP_NUMBER after it (or CPP_COMMA CPP_EMBED), so RAW_DATA_CST
> never ends up at the end of arrays of unknown length.
> Now, the c_parser_initval optimization attempted to preserve that property
> rather than changing everything that e.g. inferes array number of elements
> from the initializer etc. to deal with RAW_DATA_CST at the end, but
> it didn't take into account the possibility that there could be
> CPP_COMMA followed by CPP_CLOSE_BRACE (where the CPP_COMMA is redundant).
> 
> As we are peaking already at 4 tokens in that code, peeking more would
> require using raw tokens and that seems to be expensive doing it for
> every pair of tokens due to vec_free done when we are out of raw tokens.

Sorry for rushing the previous patch too much, turns out I was wrong,
given that the c_parser_peek_nth_token numbering is 1 based, we can peek
also with c_parser_peek_nth_token (parser, 4) and the loop actually peeked
just at 3 tokens, not 4.

So, I think it is better to revert the previous patch (but keep the new
test) and instead peek the 4th non-raw token, which is what the following
patch does.

Additionally, PR117190 shows one further spot which missed the peek of
the token after CPP_COMMA, in case it is incomplete array with exactly 65
elements with redundant comma after it, which this patch handles too.

Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux
and powerpc64-linux, ok for trunk?

2024-10-22  Jakub Jelinek  <ja...@redhat.com>

        PR c/117190
gcc/c/
        * c-parser.cc (c_parser_initval): Revert 2024-10-17 changes.
        Instead peek the 4th token and if it is not CPP_NUMBER,
        handle it like 3rd token CPP_CLOSE_BRACE for orig_len == INT_MAX.
        Also, check (2 + 2 * i)th raw token for the orig_len == INT_MAX
        case and punt if it is not CPP_NUMBER.
gcc/testsuite/
        * c-c++-common/init-5.c: New test.

--- gcc/c/c-parser.cc.jj        2024-10-17 07:08:32.651292504 +0200
+++ gcc/c/c-parser.cc   2024-10-17 22:35:42.725927078 +0200
@@ -6529,7 +6529,6 @@ c_parser_initval (c_parser *parser, stru
        unsigned int i;
        gcc_checking_assert (len >= 64);
        location_t last_loc = UNKNOWN_LOCATION;
-       location_t prev_loc = UNKNOWN_LOCATION;
        for (i = 0; i < 64; ++i)
          {
            c_token *tok = c_parser_peek_nth_token_raw (parser, 1 + 2 * i);
@@ -6545,7 +6544,6 @@ c_parser_initval (c_parser *parser, stru
            buf1[i] = (char) tree_to_uhwi (tok->value);
            if (i == 0)
              loc = tok->location;
-           prev_loc = last_loc;
            last_loc = tok->location;
          }
        if (i < 64)
@@ -6560,7 +6558,9 @@ c_parser_initval (c_parser *parser, stru
           (as guaranteed for CPP_EMBED).  */
        if (tok->type == CPP_CLOSE_BRACE && len != INT_MAX)
          len = i;
-       else if (tok->type != CPP_COMMA)
+       else if (tok->type != CPP_COMMA
+                || (c_parser_peek_nth_token_raw (parser, 2 + 2 * i)->type
+                    != CPP_NUMBER))
          {
            vals_to_ignore = i;
            return;
@@ -6569,7 +6569,6 @@ c_parser_initval (c_parser *parser, stru
        unsigned int max_len = 131072 - offsetof (struct tree_string, str) - 1;
        unsigned int orig_len = len;
        unsigned int off = 0, last = 0;
-       unsigned char lastc = 0;
        if (!wi::neg_p (wi::to_wide (val)) && wi::to_widest (val) <= UCHAR_MAX)
          off = 1;
        len = MIN (len, max_len - off);
@@ -6599,25 +6598,23 @@ c_parser_initval (c_parser *parser, stru
            if (tok2->type != CPP_COMMA && tok2->type != CPP_CLOSE_BRACE)
              break;
            buf2[i + off] = (char) tree_to_uhwi (tok->value);
-           prev_loc = last_loc;
+           /* If orig_len is INT_MAX, this can be flexible array member and
+              in that case we need to ensure another element which
+              for CPP_EMBED is normally guaranteed after it.  Include
+              that byte in the RAW_DATA_OWNER though, so it can be optimized
+              later.  */
+           if (orig_len == INT_MAX
+               && (tok2->type == CPP_CLOSE_BRACE
+                   || (c_parser_peek_nth_token (parser, 4)->type
+                       != CPP_NUMBER)))
+             {
+               last = 1;
+               break;
+             }
            last_loc = tok->location;
            c_parser_consume_token (parser);
            c_parser_consume_token (parser);
          }
-       /* If orig_len is INT_MAX, this can be flexible array member and
-          in that case we need to ensure another element which
-          for CPP_EMBED is normally guaranteed after it.  Include
-          that byte in the RAW_DATA_OWNER though, so it can be optimized
-          later.  */
-       if (orig_len == INT_MAX
-           && (!c_parser_next_token_is (parser, CPP_COMMA)
-               || c_parser_peek_2nd_token (parser)->type != CPP_NUMBER))
-         {
-           --i;
-           last = 1;
-           std::swap (prev_loc, last_loc);
-           lastc = (unsigned char) buf2[i + off];
-         }
        val = make_node (RAW_DATA_CST);
        TREE_TYPE (val) = integer_type_node;
        RAW_DATA_LENGTH (val) = i;
@@ -6633,13 +6630,6 @@ c_parser_initval (c_parser *parser, stru
        init.original_type = integer_type_node;
        init.m_decimal = 0;
        process_init_element (loc, init, false, braced_init_obstack);
-       if (last)
-         {
-           init.value = build_int_cst (integer_type_node, lastc);
-           init.original_code = INTEGER_CST;
-           set_c_expr_source_range (&init, prev_loc, prev_loc);
-           process_init_element (prev_loc, init, false, braced_init_obstack);
-         }
       }
 }
 
--- gcc/testsuite/c-c++-common/init-5.c.jj      2024-10-17 22:39:15.502873053 
+0200
+++ gcc/testsuite/c-c++-common/init-5.c 2024-10-17 22:41:36.753824025 +0200
@@ -0,0 +1,19 @@
+/* PR c/117190 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+struct S { char d[]; } v = {
+{ 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, }
+};
+
+int
+main ()
+{
+  for (int i = 0; i < 65; ++i)
+    if (v.d[i] != (i == 0 ? 8 : 0))
+      __builtin_abort ();
+}


        Jakub

Reply via email to