A failure in a test for the recently enhanced -Warray-bounds warning exposed an unnecessarily broad restriction in the strlen pass that prevents it from tracking the length of a member string of locally defined and initialized struct:
void f (void) { struct { char s[8]; int i } a = { "1234", 5 }; if (strlen (a.s) != 4) // not folded abort (); } IIUC, the restriction was in place to account for writes into an array changing or invalidating the length of a string stored in its initial elements. This would happen if the write either changed the string's terminating nul byte, or if it reset one of the prior non-nul bytes. To reflect just this intent the restriction can be tightened up to improve the pass' ability to track even the lengths of string members of locally initialized aggregates. Besides leading to better code this change also clears up the test failure. Tested on x86_64-linux. Martin
PR tree-optimization/83821 - local aggregate initialization defeats strlen optimization gcc/ChangeLog: PR tree-optimization/83821 * tree-ssa-strlen.c (maybe_invalidate): Consider the length of a string when available. (handle_char_store): Reset calloc statement on a non-nul store. gcc/testsuite/ChangeLog: PR tree-optimization/83821 * c-c++-common/Warray-bounds-4.c: Remove XFAIL. * gcc.dg/strlenopt-43.c: New test. * gcc.dg/strlenopt-44.c: Same. * gcc.dg/tree-ssa/calloc-4.c: Same. Index: gcc/testsuite/gcc.dg/strlenopt-43.c =================================================================== --- gcc/testsuite/gcc.dg/strlenopt-43.c (nonexistent) +++ gcc/testsuite/gcc.dg/strlenopt-43.c (working copy) @@ -0,0 +1,178 @@ +/* PR tree-optimization/83821 - local aggregate initialization defeats + strlen optimization + { dg-do compile } + { dg-options "-O2 -Wall -fdump-tree-optimized" } */ + +#include "strlenopt.h" + +#define CAT(x, y) x ## y +#define CONCAT(x, y) CAT (x, y) +#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__) + +#define FAIL(name) do { \ + extern void FAILNAME (name) (void); \ + FAILNAME (name)(); \ + } while (0) + +/* Macro to emit a call to funcation named + call_in_true_branch_not_eliminated_on_line_NNN() + for each call that's expected to be eliminated. The dg-final + scan-tree-dump-time directive at the bottom of the test verifies + that no such call appears in output. */ +#define ELIM(expr) \ + if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0 + +/* Macro to emit a call to a function named + call_made_in_{true,false}_branch_on_line_NNN() + for each call that's expected to be retained. The dg-final + scan-tree-dump-time directive at the bottom of the test verifies + that the expected number of both kinds of calls appears in output + (a pair for each line with the invocation of the KEEP() macro. */ +#define KEEP(expr) \ + if (expr) \ + FAIL (made_in_true_branch); \ + else \ + FAIL (made_in_false_branch) + +#define STR10 "0123456789" +#define STR20 STR10 STR10 +#define STR30 STR20 STR10 +#define STR40 STR20 STR20 + +struct Consec +{ + char s1[sizeof STR40]; + char s2[sizeof STR40]; + const char *p1; + const char *p2; +}; + +void elim_init_consecutive (void) +{ + struct Consec a = { STR10, STR10, STR10, STR10 }; + + ELIM (strlen (a.s1) == sizeof STR10 - 1); + ELIM (strlen (a.s2) == sizeof STR10 - 1); + ELIM (strlen (a.p1) == sizeof STR10 - 1); + ELIM (strlen (a.p2) == sizeof STR10 - 1); +} + + +void elim_array_init_consecutive (void) +{ + struct Consec a[2] = { + { STR10, STR20, STR30, STR40 }, + { STR40, STR30, STR20, STR10 } + }; + + ELIM (strlen (a[0].s1) == sizeof STR10 - 1); + ELIM (strlen (a[0].s2) == sizeof STR20 - 1); + ELIM (strlen (a[0].p1) == sizeof STR30 - 1); + ELIM (strlen (a[0].p2) == sizeof STR40 - 1); + + ELIM (strlen (a[1].s1) == sizeof STR40 - 1); + ELIM (strlen (a[1].s2) == sizeof STR30 - 1); + ELIM (strlen (a[1].p1) == sizeof STR20 - 1); + ELIM (strlen (a[1].p2) == sizeof STR10 - 1); +} + + +struct NonConsec +{ + char s1[sizeof STR40]; + int i1; + char s2[sizeof STR40]; + int i2; + const char *p1; + int i3; + const char *p2; + int i4; +}; + +void elim_init_nonconsecutive (void) +{ + struct NonConsec b = { STR10, 123, STR20, 456, b.s1, 789, b.s2, 123 }; + + ELIM (strlen (b.s1) == sizeof STR10 - 1); + ELIM (strlen (b.s2) == sizeof STR20 - 1); + ELIM (strlen (b.p1) == sizeof STR10 - 1); + ELIM (strlen (b.p2) == sizeof STR20 - 1); +} + +void elim_assign_tmp_nonconsecutive (void) +{ + struct NonConsec b = { "a", 1, "b", 2, "c", 3, "d", 4 }; + + b = (struct NonConsec){ STR10, 123, STR20, 456, STR30, 789, STR40, 123 }; + + ELIM (strlen (b.s1) == sizeof STR10 - 1); + ELIM (strlen (b.s2) == sizeof STR20 - 1); + ELIM (strlen (b.p1) == sizeof STR30 - 1); + ELIM (strlen (b.p2) == sizeof STR40 - 1); +} + +const struct NonConsec bcst = { + STR40, -1, STR30, -2, STR20, -3, STR10, -4 +}; + +void elim_assign_cst_nonconsecutive (void) +{ + struct NonConsec b = { "a", 1, "b", 2, "c", 3, "d" }; + + b = bcst; + + ELIM (strlen (b.s1) == sizeof STR40 - 1); + ELIM (strlen (b.s2) == sizeof STR30 - 1); + ELIM (strlen (b.p1) == sizeof STR20 - 1); + ELIM (strlen (b.p2) == sizeof STR10 - 1); +} + +void elim_copy_cst_nonconsecutive (void) +{ + struct NonConsec b = { "a", 1, "b", 2, "c", 3, "d" }; + memcpy (&b, &bcst, sizeof b); + + /* ELIM (strlen (b.s1) == sizeof STR40 - 1); + ELIM (strlen (b.s2) == sizeof STR30 - 1); */ + ELIM (strlen (b.p1) == sizeof STR20 - 1); + ELIM (strlen (b.p2) == sizeof STR10 - 1); +} + + +#line 1000 + +int sink (void*); + +void keep_init_nonconsecutive (void) +{ + struct NonConsec b = { + STR10, 123, STR20, 456, b.s1, 789, b.s2, + sink (&b) + }; + + KEEP (strlen (b.s1) == sizeof STR10 - 1); + KEEP (strlen (b.s2) == sizeof STR10 - 1); + KEEP (strlen (b.p1) == sizeof STR10 - 1); + KEEP (strlen (b.p2) == sizeof STR10 - 1); +} + +void keep_assign_tmp_nonconsecutive (void) +{ + struct NonConsec b = { "a", 1, "b", 2, "c", 3, "d", 4 }; + + b = (struct NonConsec){ + STR10, 123, STR20, 456, STR30, 789, STR40, + sink (&b) + }; + + KEEP (strlen (b.s1) == sizeof STR10 - 1); + KEEP (strlen (b.s2) == sizeof STR20 - 1); + KEEP (strlen (b.p1) == sizeof STR30 - 1); + KEEP (strlen (b.p2) == sizeof STR40 - 1); +} + +/* { dg-final { scan-tree-dump-times "call_in_true_branch_not_eliminated_" 0 "optimized" } } + + { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 8 "optimized" } } + { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 8 "optimized" } } */ + Index: gcc/testsuite/gcc.dg/strlenopt-44.c =================================================================== --- gcc/testsuite/gcc.dg/strlenopt-44.c (nonexistent) +++ gcc/testsuite/gcc.dg/strlenopt-44.c (working copy) @@ -0,0 +1,41 @@ +/* PR tree-optimization/83821 - local aggregate initialization defeats + strlen optimization + Verify that a strlen() call is eliminated for a pointer to a region + of memory allocated by calloc() even if one or more nul bytes are + written into it. + { dg-do compile } + { dg-options "-O2 -fdump-tree-optimized" } */ + +unsigned n; + +void* elim_strlen_calloc_store_memset_1 (unsigned a, unsigned b) +{ + char *p = __builtin_calloc (a, 1); + + p[1] = '\0'; + + __builtin_memset (p, 0, b); + + n = __builtin_strlen (p); + + return p; +} + +void* elim_strlen_calloc_store_memset_2 (unsigned a, unsigned b, unsigned c) +{ + char *p = __builtin_calloc (a, 1); + + p[1] = '\0'; + __builtin_memset (p, 0, b); + + n = __builtin_strlen (p); + + p[3] = 0; + __builtin_memset (p, 0, c); + + n = __builtin_strlen (p); + + return p; +} + +/* { dg-final { scan-tree-dump-not "strlen" "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/calloc-4.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/calloc-4.c (nonexistent) +++ gcc/testsuite/gcc.dg/tree-ssa/calloc-4.c (working copy) @@ -0,0 +1,39 @@ +/* PR tree-optimization/83821 - local aggregate initialization defeats + strlen optimization + Verify that a memset() call to zero out a subregion of memory + allocated by calloc() is eliminated even if a zero byte is written + into it in between the two calls. See the calloc-2.c test that + verifies that the memset() calls isn't eliminated if the written + value is non-zero. + { dg-do compile } + { dg-options "-O2 -fdump-tree-optimized" } */ + +void* elim_calloc_store_memset_1 (unsigned a, unsigned b) +{ + char *p = __builtin_calloc (a, 1); + + p[1] = '\0'; + + __builtin_memset (p, 0, b); + + n = __builtin_strlen (p); + + return p; +} + +void* elim_calloc_store_memset_2 (unsigned a, unsigned b, unsigned c) +{ + char *p = __builtin_calloc (a, 1); + + p[1] = '\0'; + __builtin_memset (p, 0, b); + + p[3] = 0; + __builtin_memset (p, 0, c); + + return p; +} + +/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } + { dg-final { scan-tree-dump-times "calloc" 2 "optimized" } } + { dg-final { scan-tree-dump-not "memset" "optimized" } } */ Index: gcc/tree-ssa-strlen.c =================================================================== --- gcc/tree-ssa-strlen.c (revision 256590) +++ gcc/tree-ssa-strlen.c (working copy) @@ -693,8 +693,16 @@ maybe_invalidate (gimple *stmt) if (!si->dont_invalidate) { ao_ref r; - /* Do not use si->nonzero_chars. */ - ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE); + tree size = NULL_TREE; + if (si->nonzero_chars) + { + /* Include the terminating nul in the size of the string + to consider when determining possible clobber. */ + tree type = TREE_TYPE (si->nonzero_chars); + size = fold_build2 (PLUS_EXPR, type, si->nonzero_chars, + build_int_cst (type, 1)); + } + ao_ref_init_from_ptr_and_size (&r, si->ptr, size); if (stmt_may_clobber_ref_p_1 (stmt, &r)) { set_strinfo (i, NULL); @@ -2839,8 +2847,23 @@ handle_char_store (gimple_stmt_iterator *gsi) si = get_strinfo (idx); if (offset == 0) ssaname = TREE_OPERAND (lhs, 0); - else if (si == NULL || compare_nonzero_chars (si, offset) < 0) - return true; + else if (si == NULL) + return true; + else if (compare_nonzero_chars (si, offset) < 0) + { + if (si->stmt && !initializer_zerop (rhs)) + { + /* Reset the calloc statement that created the string + if a non-zero value is being written into it, even + if it's past the leading nul byte, so that subsequent + memset calls aren't eliminated that may clear that + byte. */ + tree fn = gimple_call_fndecl (si->stmt); + if (DECL_FUNCTION_CODE (fn) == BUILT_IN_CALLOC) + si->stmt = NULL; + } + return true; + } } } else