The middle-end contains code to determine the lengths of constant
character arrays initialized by string literals. The code is used
in a number of optimizations and warnings.
However, the code is unable to deal with constant arrays initialized
using the braced initializer syntax, as in
const char a[] = { '1', '2', '\0' };
The attached patch extends the C and C++ front-ends to convert such
initializers into a STRING_CST form.
The goal of this work is to both enable existing optimizations for
such arrays, and to help detect bugs due to using non-nul terminated
arrays where nul-terminated strings are expected. The latter is
an extension of the GCC 8 _Wstringop-overflow and
-Wstringop-truncation warnings that help detect or prevent reading
past the end of dynamically created character arrays. Future work
includes detecting potential past-the-end reads from uninitialized
local character arrays.
Tested on x86_64-linux.
Martin
PR tree-optimization/71625 - missing strlen optimization on different array initialization style
gcc/c/ChangeLog:
PR tree-optimization/71625
* c-parser.c (c_parser_declaration_or_fndef): Call
convert_braced_list_to_string.
gcc/c-family/ChangeLog:
PR tree-optimization/71625
* c-common.c (convert_braced_list_to_string): New function.
* c-common.h (convert_braced_list_to_string): Declare it.
gcc/cp/ChangeLog:
PR tree-optimization/71625
* parser.c (cp_parser_init_declarator): Call
convert_braced_list_to_string.
gcc/testsuite/ChangeLog:
PR tree-optimization/71625
* g++.dg/init/string2.C: New test.
* g++.dg/init/string3.C: New test.
* gcc.dg/strlenopt-55.c: New test.
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 422d668..9a93175 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -8345,4 +8345,72 @@ maybe_add_include_fixit (rich_location *richloc, const char *header)
free (text);
}
+/* Attempt to convert a braced array initializer list CTOR into
+ a STRING_CST for convenience and efficiency. When non-null,
+ use EVAL to attempt to evalue constants (used by C++).
+ MAXELTS gives the maximum number of elements to accept.
+ Return the converted string on success or null on failure. */
+
+tree
+convert_braced_list_to_string (tree ctor, tree (*eval)(tree),
+ unsigned HOST_WIDE_INT maxelts)
+{
+ unsigned HOST_WIDE_INT nelts = CONSTRUCTOR_NELTS (ctor);
+
+ auto_vec<char> str;
+ str.reserve (nelts + 1);
+
+ unsigned HOST_WIDE_INT i;
+ tree index, value;
+
+ FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), i, index, value)
+ {
+ unsigned HOST_WIDE_INT idx = index ? tree_to_uhwi (index) : i;
+
+ /* auto_vec is limited to UINT_MAX elements. */
+ if (idx > UINT_MAX)
+ return NULL_TREE;
+
+ /* Attempt to evaluate constants. */
+ if (eval)
+ value = eval (value);
+
+ /* Avoid non-constant initializers. */
+ if (!tree_fits_uhwi_p (value))
+ return NULL_TREE;
+
+ /* Skip over embedded nuls. */
+ unsigned val = tree_to_uhwi (value);
+ if (!val)
+ continue;
+
+ /* Bail if the CTOR has a block of more than 256 embedded nuls
+ due to implicitly initialized elements. */
+ unsigned nelts = (idx - str.length ()) + 1;
+ if (nelts > 256)
+ return NULL_TREE;
+
+ if (nelts > 1)
+ {
+ str.reserve (idx);
+ str.quick_grow_cleared (idx);
+ }
+
+ if (idx > maxelts)
+ return NULL_TREE;
+
+ str.safe_insert (idx, val);
+ }
+
+ /* Append a nul for the empty initializer { } and for the last
+ explicit initializer in the loop above that is a nul. */
+ if (!nelts || str.length () < i)
+ str.safe_push (0);
+
+ /* Build a string literal but return the embedded STRING_CST. */
+ tree res = build_string_literal (str.length (), str.begin ());
+ res = TREE_OPERAND (TREE_OPERAND (res, 0), 0);
+ return res;
+}
+
#include "gt-c-family-c-common.h"
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index fcec95b..343a1ae 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1331,6 +1331,8 @@ extern void maybe_add_include_fixit (rich_location *, const char *);
extern void maybe_suggest_missing_token_insertion (rich_location *richloc,
enum cpp_ttype token_type,
location_t prev_token_loc);
+extern tree convert_braced_list_to_string (tree, tree (*)(tree) = NULL,
+ unsigned HOST_WIDE_INT = -1);
#if CHECKING_P
namespace selftest {
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 7a92628..e12d270 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -2126,6 +2126,23 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok,
if (d != error_mark_node)
{
maybe_warn_string_init (init_loc, TREE_TYPE (d), init);
+
+ /* Convert a string CONSTRUCTOR into a STRING_CST. */
+ tree valtype = TREE_TYPE (init.value);
+ if (TREE_CODE (init.value) == CONSTRUCTOR
+ && TREE_CODE (valtype) == ARRAY_TYPE)
+ {
+ valtype = TREE_TYPE (valtype);
+ if (TYPE_STRING_FLAG (valtype))
+ if (tree str
+ = convert_braced_list_to_string (init.value))
+ {
+ /* Replace the initializer with the string
+ constant. The resu*/
+ init.value = str;
+ }
+ }
+
finish_decl (d, init_loc, init.value,
init.original_type, asm_name);
}
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index d44a6b8..c35c2f1 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -19825,6 +19825,32 @@ cp_parser_init_declarator (cp_parser* parser,
finish_lambda_scope ();
if (initializer == error_mark_node)
cp_parser_skip_to_end_of_statement (parser);
+ else if (decl)
+ {
+ tree valtype = TREE_TYPE (decl);
+ if (TREE_CODE (valtype) == ARRAY_TYPE
+ && TYPE_STRING_FLAG (TREE_TYPE (valtype))
+ && TYPE_MAIN_VARIANT (TREE_TYPE (valtype)) == char_type_node)
+ {
+ /* If the array has an explicit bound, use it to
+ constrain the size of the string. */
+ unsigned HOST_WIDE_INT maxelts = HOST_WIDE_INT_M1U;
+ if (tree nelts = DECL_SIZE_UNIT (decl))
+ if (tree_fits_uhwi_p (nelts))
+ maxelts = tree_to_uhwi (nelts);
+
+ /* Convert a string CONSTRUCTOR into a STRING_CST. */
+ if (TREE_CODE (initializer) == CONSTRUCTOR
+ && TREE_TYPE (initializer) == init_list_type_node)
+ {
+ if (tree str
+ = convert_braced_list_to_string (initializer,
+ scalar_constant_value,
+ maxelts))
+ initializer = str;
+ }
+ }
+ }
}
}
diff --git a/gcc/testsuite/g++.dg/init/string2.C b/gcc/testsuite/g++.dg/init/string2.C
new file mode 100644
index 0000000..acb2f5b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/string2.C
@@ -0,0 +1,49 @@
+// PR tree-optimization/71625 - missing strlen optimization on different
+// array initialization style
+//
+// Verify that strlen() calls with constant character array arguments
+// initialized with string constants are folded. (This is a small
+// subset of pr63989).
+// { dg-do compile }
+// { dg-options "-O0 -fdump-tree-gimple" }
+
+const char a0[] = { 'a', 'b', 'c', '\0' };
+
+int len0 ()
+{
+ return __builtin_strlen (a0);
+}
+
+const char c = 0;
+const char a1[] = { 'a', 'b', 'c', c };
+
+int len1 ()
+{
+ return __builtin_strlen (a1);
+}
+
+#if 0
+
+// The following aren't handled.
+
+const char &cref = c;
+const char a2[] = { 'a', 'b', 'c', cref };
+
+int len2 ()
+{
+ return __builtin_strlen (a2);
+}
+
+
+const char* const cptr = &cref;
+const char a3[] = { 'a', 'b', 'c', *cptr };
+
+int len3 ()
+{
+ return __builtin_strlen (a3);
+}
+
+#endif
+
+// { dg-final { scan-tree-dump-times "strlen" 0 "gimple" } }
diff --git a/gcc/testsuite/g++.dg/init/string3.C b/gcc/testsuite/g++.dg/init/string3.C
new file mode 100644
index 0000000..f7c7f2f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/string3.C
@@ -0,0 +1,37 @@
+// PR tree-optimization/71625 - missing strlen optimization on different
+// array initialization style
+//
+// Verify that strlen() call with a constant character array argument
+// initialized with non-constant elements isn't folded. (This is a small
+// subset of pr63989).
+//
+// { dg-do compile }
+// { dg-options "-O2 -fdump-tree-optimized" }
+
+
+extern const char c;
+const char a0[] = { 'a', 'b', 'c', c };
+
+int len0 ()
+{
+ return __builtin_strlen (a0);
+}
+
+const char &ref = c;
+const char a1[] = { 'a', 'b', 'c', ref };
+
+int len1 ()
+{
+ return __builtin_strlen (a1);
+}
+
+const char* const ptr = &c;
+const char a2[] = { 'a', 'b', 'c', *ptr };
+
+int len2 ()
+{
+ return __builtin_strlen (a2);
+}
+
+// { dg-final { scan-tree-dump-times "strlen" 3 "optimized" } }
diff --git a/gcc/testsuite/gcc.dg/strlenopt-55.c b/gcc/testsuite/gcc.dg/strlenopt-55.c
new file mode 100644
index 0000000..68c7f1d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-55.c
@@ -0,0 +1,83 @@
+/* PR tree-optimization/71625 - missing strlen optimization on different
+ array initialization style
+
+ Verify that strlen() of braced initialized array is folded
+ { dg-do compile }
+ { dg-options "-O0 -Wall -fdump-tree-gimple -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+const char a2_implicit[2] = { };
+const char a3_implicit[3] = { };
+
+const char a3_nul[3] = { 0 };
+const char a5_nul1[3] = { [1] = 0 };
+const char a7_nul2[3] = { [2] = 0 };
+
+const char ax_2_nul[] = { '1', '2', '\0' };
+const char ax_3_nul[] = { '1', '2', '3', '\0' };
+
+const char ax_3_des_nul[] = { [3] = 0, [2] = '3', [1] = '2', [0] = '1' };
+
+const char ax_3[] = { '1', '2', '3' };
+const char a3_3[3] = { '1', '2', '3' };
+
+const char a100_3[] = { '1', '2', '3', [100] = '\0' };
+
+#define CONCAT(x, y) x ## y
+#define CAT(x, y) CONCAT (x, y)
+#define FAILNAME(name) CAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do { \
+ extern void FAILNAME (name) (void); \
+ FAILNAME (name)(); \
+ } while (0)
+
+/* Macro to emit a call to funcation named
+ call_in_true_branch_not_eliminated_on_line_NNN()
+ for each call that's expected to be eliminated. The dg-final
+ scan-tree-dump-time directive at the bottom of the test verifies
+ that no such call appears in output. */
+#define ELIM(expr) \
+ if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0
+
+#define T(s, n) ELIM (strlen (s) == n)
+
+void test_nulstring (void)
+{
+ T (a2_implicit, 0);
+ T (a3_implicit, 0);
+
+ T (a3_nul, 0);
+ T (a5_nul1, 0);
+ T (a7_nul2, 0);
+
+ T (ax_2_nul, 2);
+ T (ax_3_nul, 3);
+ T (ax_3_des_nul, 3);
+
+ T (a100_3, 3);
+}
+
+/* Verify that excessively large initializers don't run out of
+ memory. */
+
+const char large_string[] = { 'a', [__INT_MAX__] = '\0' };
+
+const int test_large_string (void)
+{
+ return large_string[0] + large_string[__INT_MAX__ - 1];
+}
+
+
+const char very_large_string[] = { 'a', [__LONG_MAX__ / 2] = '\0' };
+
+const int test_very_large_string (void)
+{
+ return very_large_string[0] + very_large_string[__LONG_MAX__ / 2 - 1];
+}
+
+
+/* { dg-final { scan-tree-dump-times "strlen" 0 "gimple" } }
+ { dg-final { scan-tree-dump-times "call_in_true_branch_not_eliminated" 0 "optimized" } } */