In the long resolved pr57230 I came across a discussion of an enhancement to the strlen pass to also handle non-const char assignments into the middle of known strings, in addition to assignments where the stored value is known. Storing non-nul into the middle of a string of known length means that its length can be assumed to stay the same, exposing more strlen optimization opportunities.
The attached patch implements this idea. (I looked for a simple function that returns true/false based on whether a SSA_NAME is or isn't definitely non-zero but couldn't find one so I created one though it seems that extending one of the existing functions might be a better approach?) In a GCC build the patch discovers 40 non-nul stores out of 70 instances where range info is available (all in libstdc++), so it gives some, albeit very small, improvement. Martin
PR tree-optimization/86083 - handle non-constant assignments in strlen gcc/ChangeLog: PR tree-optimization/86083 * tree-ssa-strlen.c (value_is_nonzero): New static function. (handle_char_store): Call it. gcc/testsuite/ChangeLog: PR tree-optimization/86083 * gcc.dg/strlenopt-44.c: New test. Index: gcc/tree-ssa-strlen.c =================================================================== --- gcc/tree-ssa-strlen.c (revision 261284) +++ gcc/tree-ssa-strlen.c (working copy) @@ -3126,6 +3126,27 @@ get_string_cst_length (tree rhs) return -1; } +/* Return true iff EXP has a non-zero value, false otherwise. */ + +static bool +value_is_nonzero (tree exp) +{ + if (TREE_CODE (exp) != SSA_NAME) + return integer_nonzerop (exp); + + wide_int min, max; + value_range_type rng = get_range_info (exp, &min, &max); + + if (rng == VR_RANGE) + return wi::gts_p (min, wi::zero (min.get_precision ())); + + if (rng == VR_ANTI_RANGE) + return (wi::les_p (min, wi::zero (min.get_precision ())) + && wi::les_p (wi::zero (min.get_precision ()), max)); + + return false; +} + /* Handle a single character store. */ static bool @@ -3165,9 +3186,7 @@ handle_char_store (gimple_stmt_iterator *gsi) } bool storing_zero_p = initializer_zerop (rhs); - bool storing_nonzero_p = (!storing_zero_p - && TREE_CODE (rhs) == INTEGER_CST - && integer_nonzerop (rhs)); + bool storing_nonzero_p = !storing_zero_p && value_is_nonzero (rhs); /* Set to the length of the string being assigned if known. */ HOST_WIDE_INT rhslen; Index: gcc/testsuite/gcc.dg/strlenopt-44.c =================================================================== --- gcc/testsuite/gcc.dg/strlenopt-44.c (nonexistent) +++ gcc/testsuite/gcc.dg/strlenopt-44.c (working copy) @@ -0,0 +1,79 @@ +/* PR tree-optimization/86083 - handle non-constant assignments in strlen + { dg-do compile } + { dg-options "-O2 -Wall -fdump-tree-optimized" } */ + +#include "range.h" +#include "strlenopt.h" + +#define CAT(x, y) x ## y +#define CONCAT(x, y) CAT (x, y) +#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__) + +#define FAIL(name) do { \ + extern void FAILNAME (name) (void); \ + FAILNAME (name)(); \ + } while (0) + +/* Macro to emit a call to funcation named + call_in_true_branch_not_eliminated_on_line_NNN() + for each call that's expected to be eliminated. The dg-final + scan-tree-dump-time directive at the bottom of the test verifies + that no such call appears in output. */ +#define ASSERT_ELIM(expr) \ + if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0 + +/* Macro to emit a call to a function named + call_made_in_{true,false}_branch_on_line_NNN() + for each call that's expected to be retained. The dg-final + scan-tree-dump-time directive at the bottom of the test verifies + that the expected number of both kinds of calls appears in output + (a pair for each line with the invocation of the KEEP() macro. */ +#define ASSERT_KEEP(expr) \ + if (expr) \ + FAIL (made_in_true_branch); \ + else \ + FAIL (made_in_false_branch) + + +#define ELIM(init, i, c, res) \ + do { \ + char a[] = init; \ + a[i] = c; \ + ASSERT_ELIM (strlen (a) == res); \ + } while (0) + +#define KEEP(init, i, c, res) \ + do { \ + char a[] = init; \ + a[i] = c; \ + ASSERT_KEEP (strlen (a) == res); \ + } while (0) + +void test_elim (void) +{ + ELIM ("1", 0, UR (1, 2), 1); + ELIM ("1", 0, UR (1, 127), 1); + ELIM ("1", 0, UR ('0', '9'), 1); + + ELIM ("12", 0, UR (1, 127), 2); + ELIM ("12", 1, UR (1, 127), 2); + + ELIM ("123", 0, UR (1, 9), 3); + ELIM ("123", 1, UR (10, 99), 3); + ELIM ("123", 2, UR (100, 127), 3); +} + +#line 1000 + +void test_keep (void) +{ + size_t uchar_max = (unsigned char)-1; + + KEEP ("1", 0, UR (1, uchar_max + 1), 1); + KEEP ("1\0\3", 1, UR (1, 2), 1); +} + +/* { dg-final { scan-tree-dump-times "call_in_true_branch_not_eliminated_" 0 "optimized" } } + + { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 2 "optimized" } } + { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 2 "optimized" } } */