https://gcc.gnu.org/g:96ba5e5663d4390a7e69735ce3c9de657fc543fc
commit r15-4402-g96ba5e5663d4390a7e69735ce3c9de657fc543fc Author: Jakub Jelinek <ja...@redhat.com> Date: Thu Oct 17 06:59:31 2024 +0200 c: Fix up speed up compilation of large char array initializers when not using #embed [PR117177] Apparently my c: Speed up compilation of large char array initializers when not using #embed patch broke building glibc. The issue is that when using CPP_EMBED, we are guaranteed by the preprocessor that there is CPP_NUMBER CPP_COMMA before it and CPP_COMMA CPP_NUMBER after it (or CPP_COMMA CPP_EMBED), so RAW_DATA_CST never ends up at the end of arrays of unknown length. Now, the c_parser_initval optimization attempted to preserve that property rather than changing everything that e.g. inferes array number of elements from the initializer etc. to deal with RAW_DATA_CST at the end, but it didn't take into account the possibility that there could be CPP_COMMA followed by CPP_CLOSE_BRACE (where the CPP_COMMA is redundant). As we are peaking already at 4 tokens in that code, peeking more would require using raw tokens and that seems to be expensive doing it for every pair of tokens due to vec_free done when we are out of raw tokens. So, the following patch instead determines the case where we want another INTEGER_CST element after it after consuming the tokens, and just arranges for another process_init_element. 2024-10-17 Jakub Jelinek <ja...@redhat.com> PR c/117177 gcc/c/ * c-parser.cc (c_parser_initval): Instead of doing orig_len == INT_MAX checks before consuming tokens to set last = 1, check it after consuming it and if not followed by CPP_COMMA CPP_NUMBER, call process_init_element once more with the last CPP_NUMBER. gcc/testsuite/ * c-c++-common/init-4.c: New test. Diff: --- gcc/c/c-parser.cc | 35 +++++++++---- gcc/testsuite/c-c++-common/init-4.c | 97 +++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 10 deletions(-) diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index e4381044e5cb..090ab1cbc088 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -6529,6 +6529,7 @@ c_parser_initval (c_parser *parser, struct c_expr *after, unsigned int i; gcc_checking_assert (len >= 64); location_t last_loc = UNKNOWN_LOCATION; + location_t prev_loc = UNKNOWN_LOCATION; for (i = 0; i < 64; ++i) { c_token *tok = c_parser_peek_nth_token_raw (parser, 1 + 2 * i); @@ -6544,6 +6545,7 @@ c_parser_initval (c_parser *parser, struct c_expr *after, buf1[i] = (char) tree_to_uhwi (tok->value); if (i == 0) loc = tok->location; + prev_loc = last_loc; last_loc = tok->location; } if (i < 64) @@ -6567,6 +6569,7 @@ c_parser_initval (c_parser *parser, struct c_expr *after, unsigned int max_len = 131072 - offsetof (struct tree_string, str) - 1; unsigned int orig_len = len; unsigned int off = 0, last = 0; + unsigned char lastc = 0; if (!wi::neg_p (wi::to_wide (val)) && wi::to_widest (val) <= UCHAR_MAX) off = 1; len = MIN (len, max_len - off); @@ -6596,20 +6599,25 @@ c_parser_initval (c_parser *parser, struct c_expr *after, if (tok2->type != CPP_COMMA && tok2->type != CPP_CLOSE_BRACE) break; buf2[i + off] = (char) tree_to_uhwi (tok->value); - /* If orig_len is INT_MAX, this can be flexible array member and - in that case we need to ensure another element which - for CPP_EMBED is normally guaranteed after it. Include - that byte in the RAW_DATA_OWNER though, so it can be optimized - later. */ - if (tok2->type == CPP_CLOSE_BRACE && orig_len == INT_MAX) - { - last = 1; - break; - } + prev_loc = last_loc; last_loc = tok->location; c_parser_consume_token (parser); c_parser_consume_token (parser); } + /* If orig_len is INT_MAX, this can be flexible array member and + in that case we need to ensure another element which + for CPP_EMBED is normally guaranteed after it. Include + that byte in the RAW_DATA_OWNER though, so it can be optimized + later. */ + if (orig_len == INT_MAX + && (!c_parser_next_token_is (parser, CPP_COMMA) + || c_parser_peek_2nd_token (parser)->type != CPP_NUMBER)) + { + --i; + last = 1; + std::swap (prev_loc, last_loc); + lastc = (unsigned char) buf2[i + off]; + } val = make_node (RAW_DATA_CST); TREE_TYPE (val) = integer_type_node; RAW_DATA_LENGTH (val) = i; @@ -6625,6 +6633,13 @@ c_parser_initval (c_parser *parser, struct c_expr *after, init.original_type = integer_type_node; init.m_decimal = 0; process_init_element (loc, init, false, braced_init_obstack); + if (last) + { + init.value = build_int_cst (integer_type_node, lastc); + init.original_code = INTEGER_CST; + set_c_expr_source_range (&init, prev_loc, prev_loc); + process_init_element (prev_loc, init, false, braced_init_obstack); + } } } diff --git a/gcc/testsuite/c-c++-common/init-4.c b/gcc/testsuite/c-c++-common/init-4.c new file mode 100644 index 000000000000..d575a3f5a513 --- /dev/null +++ b/gcc/testsuite/c-c++-common/init-4.c @@ -0,0 +1,97 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +unsigned char a1[] = { + 0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, + 0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, + 0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, + 0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, + 0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, + 0xfa, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; +unsigned char a2[] = { + 0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, + 0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, + 0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, + 0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, + 0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00 +}; +unsigned char a3[] = { + 0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, + 0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, + 0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, + 0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, + 0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, +}; +unsigned char a4[] = { + 0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, + 0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, + 0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, + 0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, + 0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, + 0xfa +}; +unsigned char a5[] = { + 0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, + 0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, + 0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, + 0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, + 0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, + 0xfa, +}; +unsigned char a6[] = { + 0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, + 0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, + 0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, + 0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, + 0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, + 0xfa, 0xfb +}; +unsigned char a7[] = { + 0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, + 0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, + 0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, + 0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, + 0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, + 0xfa, 0xfb +}; + +int +main () +{ + if (sizeof (a1) != 72 + || sizeof (a2) != 64 + || __builtin_memcmp (a1, a2, 64) != 0 + || sizeof (a3) != 64 + || __builtin_memcmp (a1, a3, 64) != 0 + || sizeof (a4) != 65 + || __builtin_memcmp (a1, a4, 65) != 0 + || sizeof (a5) != 65 + || __builtin_memcmp (a1, a5, 65) != 0 + || sizeof (a6) != 66 + || __builtin_memcmp (a1, a6, 66) != 0 + || sizeof (a7) != 66 + || __builtin_memcmp (a1, a7, 66) != 0) + __builtin_abort (); +}