https://gcc.gnu.org/g:96ba5e5663d4390a7e69735ce3c9de657fc543fc

commit r15-4402-g96ba5e5663d4390a7e69735ce3c9de657fc543fc
Author: Jakub Jelinek <ja...@redhat.com>
Date:   Thu Oct 17 06:59:31 2024 +0200

    c: Fix up speed up compilation of large char array initializers when not 
using #embed [PR117177]
    
    Apparently my
    c: Speed up compilation of large char array initializers when not using 
#embed
    patch broke building glibc.
    
    The issue is that when using CPP_EMBED, we are guaranteed by the
    preprocessor that there is CPP_NUMBER CPP_COMMA before it and
    CPP_COMMA CPP_NUMBER after it (or CPP_COMMA CPP_EMBED), so RAW_DATA_CST
    never ends up at the end of arrays of unknown length.
    Now, the c_parser_initval optimization attempted to preserve that property
    rather than changing everything that e.g. inferes array number of elements
    from the initializer etc. to deal with RAW_DATA_CST at the end, but
    it didn't take into account the possibility that there could be
    CPP_COMMA followed by CPP_CLOSE_BRACE (where the CPP_COMMA is redundant).
    
    As we are peaking already at 4 tokens in that code, peeking more would
    require using raw tokens and that seems to be expensive doing it for
    every pair of tokens due to vec_free done when we are out of raw tokens.
    
    So, the following patch instead determines the case where we want
    another INTEGER_CST element after it after consuming the tokens, and just
    arranges for another process_init_element.
    
    2024-10-17  Jakub Jelinek  <ja...@redhat.com>
    
            PR c/117177
    gcc/c/
            * c-parser.cc (c_parser_initval): Instead of doing
            orig_len == INT_MAX checks before consuming tokens to set
            last = 1, check it after consuming it and if not followed
            by CPP_COMMA CPP_NUMBER, call process_init_element once
            more with the last CPP_NUMBER.
    gcc/testsuite/
            * c-c++-common/init-4.c: New test.

Diff:
---
 gcc/c/c-parser.cc                   | 35 +++++++++----
 gcc/testsuite/c-c++-common/init-4.c | 97 +++++++++++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+), 10 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index e4381044e5cb..090ab1cbc088 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -6529,6 +6529,7 @@ c_parser_initval (c_parser *parser, struct c_expr *after,
        unsigned int i;
        gcc_checking_assert (len >= 64);
        location_t last_loc = UNKNOWN_LOCATION;
+       location_t prev_loc = UNKNOWN_LOCATION;
        for (i = 0; i < 64; ++i)
          {
            c_token *tok = c_parser_peek_nth_token_raw (parser, 1 + 2 * i);
@@ -6544,6 +6545,7 @@ c_parser_initval (c_parser *parser, struct c_expr *after,
            buf1[i] = (char) tree_to_uhwi (tok->value);
            if (i == 0)
              loc = tok->location;
+           prev_loc = last_loc;
            last_loc = tok->location;
          }
        if (i < 64)
@@ -6567,6 +6569,7 @@ c_parser_initval (c_parser *parser, struct c_expr *after,
        unsigned int max_len = 131072 - offsetof (struct tree_string, str) - 1;
        unsigned int orig_len = len;
        unsigned int off = 0, last = 0;
+       unsigned char lastc = 0;
        if (!wi::neg_p (wi::to_wide (val)) && wi::to_widest (val) <= UCHAR_MAX)
          off = 1;
        len = MIN (len, max_len - off);
@@ -6596,20 +6599,25 @@ c_parser_initval (c_parser *parser, struct c_expr 
*after,
            if (tok2->type != CPP_COMMA && tok2->type != CPP_CLOSE_BRACE)
              break;
            buf2[i + off] = (char) tree_to_uhwi (tok->value);
-           /* If orig_len is INT_MAX, this can be flexible array member and
-              in that case we need to ensure another element which
-              for CPP_EMBED is normally guaranteed after it.  Include
-              that byte in the RAW_DATA_OWNER though, so it can be optimized
-              later.  */
-           if (tok2->type == CPP_CLOSE_BRACE && orig_len == INT_MAX)
-             {
-               last = 1;
-               break;
-             }
+           prev_loc = last_loc;
            last_loc = tok->location;
            c_parser_consume_token (parser);
            c_parser_consume_token (parser);
          }
+       /* If orig_len is INT_MAX, this can be flexible array member and
+          in that case we need to ensure another element which
+          for CPP_EMBED is normally guaranteed after it.  Include
+          that byte in the RAW_DATA_OWNER though, so it can be optimized
+          later.  */
+       if (orig_len == INT_MAX
+           && (!c_parser_next_token_is (parser, CPP_COMMA)
+               || c_parser_peek_2nd_token (parser)->type != CPP_NUMBER))
+         {
+           --i;
+           last = 1;
+           std::swap (prev_loc, last_loc);
+           lastc = (unsigned char) buf2[i + off];
+         }
        val = make_node (RAW_DATA_CST);
        TREE_TYPE (val) = integer_type_node;
        RAW_DATA_LENGTH (val) = i;
@@ -6625,6 +6633,13 @@ c_parser_initval (c_parser *parser, struct c_expr *after,
        init.original_type = integer_type_node;
        init.m_decimal = 0;
        process_init_element (loc, init, false, braced_init_obstack);
+       if (last)
+         {
+           init.value = build_int_cst (integer_type_node, lastc);
+           init.original_code = INTEGER_CST;
+           set_c_expr_source_range (&init, prev_loc, prev_loc);
+           process_init_element (prev_loc, init, false, braced_init_obstack);
+         }
       }
 }
 
diff --git a/gcc/testsuite/c-c++-common/init-4.c 
b/gcc/testsuite/c-c++-common/init-4.c
new file mode 100644
index 000000000000..d575a3f5a513
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/init-4.c
@@ -0,0 +1,97 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+unsigned char a1[] = {
+  0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf,
+  0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7,
+  0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf,
+  0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7,
+  0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef,
+  0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7,
+  0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff,
+  0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00,
+  0xfa, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+unsigned char a2[] = {
+  0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf,
+  0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7,
+  0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf,
+  0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7,
+  0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef,
+  0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7,
+  0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff,
+  0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00
+};
+unsigned char a3[] = {
+  0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf,
+  0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7,
+  0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf,
+  0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7,
+  0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef,
+  0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7,
+  0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff,
+  0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00,
+};
+unsigned char a4[] = {
+  0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf,
+  0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7,
+  0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf,
+  0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7,
+  0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef,
+  0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7,
+  0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff,
+  0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00,
+  0xfa
+};
+unsigned char a5[] = {
+  0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf,
+  0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7,
+  0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf,
+  0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7,
+  0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef,
+  0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7,
+  0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff,
+  0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00,
+  0xfa,
+};
+unsigned char a6[] = {
+  0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf,
+  0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7,
+  0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf,
+  0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7,
+  0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef,
+  0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7,
+  0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff,
+  0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00,
+  0xfa, 0xfb
+};
+unsigned char a7[] = {
+  0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf,
+  0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7,
+  0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf,
+  0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7,
+  0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef,
+  0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7,
+  0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff,
+  0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00,
+  0xfa, 0xfb
+};
+
+int
+main ()
+{
+  if (sizeof (a1) != 72
+      || sizeof (a2) != 64
+      || __builtin_memcmp (a1, a2, 64) != 0
+      || sizeof (a3) != 64
+      || __builtin_memcmp (a1, a3, 64) != 0
+      || sizeof (a4) != 65
+      || __builtin_memcmp (a1, a4, 65) != 0
+      || sizeof (a5) != 65
+      || __builtin_memcmp (a1, a5, 65) != 0
+      || sizeof (a6) != 66
+      || __builtin_memcmp (a1, a6, 66) != 0
+      || sizeof (a7) != 66
+      || __builtin_memcmp (a1, a7, 66) != 0)
+    __builtin_abort ();
+}

Reply via email to