[1/4] - Introduce struct strlen_data_t into gimple-fold
This patch introduces a new data structure to reduce the number of arguments and overloads of the get_range_strlen API. One of the goals of this change is to make the API safer to use for optimization (which looks for "permissive" results to account even for some undefined uses) vs warnings (which relies on conservative results to avoid false positives). The patch also adds provides explicit arguments to get_range_strlen and adds descriptive comments to make the affected code easier to follow. Beyond making use of the new data structure the patch makes no observable changes. The changes to gcc/testsuite/gcc.dg/strlenopt-51.c fix a few typos with no functional effect and tweak the helper macro used by the test to make debugging slightly easier.
[1/4] - Introduce struct strlen_data_t into gimple-fold. gcc/ChangeLog: * builtins.c (check_access): Document argumens to a function call. (check_strncat_sizes): Same. (expand_builtin_strncat): Same. * calls.c (maybe_warn_nonstring_arg): Same. * gimple-fold.h (struct strlen_data_t): New type. (get_range_strlen): New overload. * gimple-fold.c (struct strlen_data_t): New type. (get_range_strlen): Change declaration to take strlen_data_t* argument instead of length, flexp, eltsize, and nonstr. Adjust to use strlen_data_t members instead of other arguments. Also set strlen_data_t::maxsize to the same value as maxlen. (extern get_range_strlen): Define new overload. (get_maxval_strlen): Adjust to use strlen_data_t. * gimple-ssa-sprintf.c (get_string_length): Same. gcc/testsuite/ChangeLog: gcc.dg/strlenopt-51.c: Add counter to macros and fix typos. diff --git a/gcc/builtins.c b/gcc/builtins.c index 3f39d10..3e31af4 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -3235,7 +3235,8 @@ check_access (tree exp, tree, tree, tree dstwrite, the upper bound given by MAXREAD add one to it for the terminating nul. Otherwise, set it to one for the same reason, or to MAXREAD as appropriate. */ - get_range_strlen (srcstr, range); + get_range_strlen (srcstr, range, /* eltsize = */ 1, + /* strict = */ false ); if (range[0] && (!maxread || TREE_CODE (maxread) == INTEGER_CST)) { if (maxread && tree_int_cst_le (maxread, range[0])) @@ -4107,7 +4108,7 @@ check_strncat_sizes (tree exp, tree objsize) /* Try to determine the range of lengths that the source expression refers to. */ tree lenrange[2]; - get_range_strlen (src, lenrange); + get_range_strlen (src, lenrange, /* eltsize = */ 1, /* strict = */ false); /* Try to verify that the destination is big enough for the shortest string. */ @@ -4174,12 +4175,13 @@ expand_builtin_strncat (tree exp, rtx) tree slen = c_strlen (src, 1); /* Try to determine the range of lengths that the source expression - refers to. */ + refers to. Since the lengths are only used for warning and not + for code generation disable strict mode below. */ tree lenrange[2]; if (slen) lenrange[0] = lenrange[1] = slen; else - get_range_strlen (src, lenrange); + get_range_strlen (src, lenrange, /* eltsize = */ 1, /* strict = */ false); /* Try to verify that the destination is big enough for the shortest string. First try to determine the size of the destination object diff --git a/gcc/calls.c b/gcc/calls.c index e9660b6..11f00ad 100644 --- a/gcc/calls.c +++ b/gcc/calls.c @@ -1557,7 +1557,10 @@ maybe_warn_nonstring_arg (tree fndecl, tree exp) tree bound = NULL_TREE; /* The range of lengths of a string argument to one of the comparison - functions. If the length is less than the bound it is used instead. */ + functions. If the length is less than the bound it is used instead. + Since the lengths are only used for warning and not for code + generation disable strict mode in the calls to get_range_strlen + below. */ tree lenrng[2] = { NULL_TREE, NULL_TREE }; /* It's safe to call "bounded" string functions with a non-string @@ -1582,7 +1585,8 @@ maybe_warn_nonstring_arg (tree fndecl, tree exp) { tree arg = CALL_EXPR_ARG (exp, argno); g if (!get_attr_nonstring_decl (arg)) - get_range_strlen (arg, lenrng); + get_range_strlen (arg, lenrng, /* eltsize = */ 1, + /* strict = */ false); } } /* Fall through. */ @@ -1603,7 +1607,8 @@ maybe_warn_nonstring_arg (tree fndecl, tree exp) { tree arg = CALL_EXPR_ARG (exp, 0); if (!get_attr_nonstring_decl (arg)) - get_range_strlen (arg, lenrng); + get_range_strlen (arg, lenrng, /* eltsize = */ 1, + /* strict = */ false); if (nargs > 1) bound = CALL_EXPR_ARG (exp, 1); diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index 1e84722..8f71e9c 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -1262,11 +1262,13 @@ gimple_fold_builtin_memset (gimple_stmt_iterator *gsi, tree c, tree len) } -/* Obtain the minimum and maximum string length or minimum and maximum - value of ARG in LENGTH[0] and LENGTH[1], respectively. +/* Try to obtain the range of the lengths of the string(s) referenced + by ARG, or the size of the largest array ARG referes to if the range + of lengths cannot be determined, and store all in *PDATA. If ARG is an SSA name variable, follow its use-def chains. When - TYPE == 0, if LENGTH[1] is not equal to the length we determine or - if we are unable to determine the length or value, return false. + TYPE == 0, then if PDATA->MAXLEN is not equal to the determined + length or if we are unable to determine the length or value, return + false. VISITED is a bitmap of visited variables. TYPE is 0 if string length should be obtained, 1 for maximum string length and 2 for maximum value ARG can have. @@ -1276,25 +1278,21 @@ gimple_fold_builtin_memset (gimple_stmt_iterator *gsi, tree c, tree len) PHIs and COND_EXPRs optimistically, if we can determine string length minimum and maximum, it will use the minimum from the ones where it can be determined. - Set *FLEXP to true if the range of the string lengths has been + Set PDATA->FLEXP to true if the range of the string lengths has been obtained from the upper bound of an array at the end of a struct. Such an array may hold a string that's longer than its upper bound due to it being used as a poor-man's flexible array member. - Pass NONSTR through to children. - ELTSIZE is 1 for normal single byte character strings, and 2 or - 4 for wide characer strings. ELTSIZE is by default 1. */ + Set PDATA->NONSTR if ARG refers to an unterminated constant array. + On input, set PDATA->ELTSIZE to 1 for normal single byte character + strings, and 2 or 4 for wide characer strings. */ static bool -get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, - int fuzzy, bool *flexp, unsigned eltsize, tree *nonstr) +get_range_strlen (tree arg, bitmap *visited, int type, int fuzzy, + strlen_data_t *pdata) { tree var, val = NULL_TREE; gimple *def_stmt; - /* The minimum and maximum length. */ - tree *const minlen = length; - tree *const maxlen = length + 1; - if (TREE_CODE (arg) != SSA_NAME) { /* We can end up with &(*iftmp_1)[0] here as well, so handle it. */ @@ -1307,9 +1305,8 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, tree aop0 = TREE_OPERAND (op, 0); if (TREE_CODE (aop0) == INDIRECT_REF && TREE_CODE (TREE_OPERAND (aop0, 0)) == SSA_NAME) - return get_range_strlen (TREE_OPERAND (aop0, 0), length, - visited, type, fuzzy, flexp, - eltsize, nonstr); + return get_range_strlen (TREE_OPERAND (aop0, 0), visited, type, + fuzzy, pdata); } else if (TREE_CODE (TREE_OPERAND (op, 0)) == COMPONENT_REF && fuzzy) { @@ -1337,14 +1334,13 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, return false; } else - val = c_strlen (arg, 1, nonstr, eltsize); + val = c_strlen (arg, 1, &pdata->nonstr, pdata->eltsize); if (!val && fuzzy) { if (TREE_CODE (arg) == ADDR_EXPR) - return get_range_strlen (TREE_OPERAND (arg, 0), length, - visited, type, fuzzy, flexp, - eltsize, nonstr); + return get_range_strlen (TREE_OPERAND (arg, 0), visited, type, + fuzzy, pdata); if (TREE_CODE (arg) == ARRAY_REF) { @@ -1369,12 +1365,12 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, integer_one_node); /* Set the minimum size to zero since the string in the array could have zero length. */ - *minlen = ssize_int (0); + pdata->minlen = ssize_int (0); if (TREE_CODE (TREE_OPERAND (arg, 0)) == COMPONENT_REF && type == TREE_TYPE (TREE_OPERAND (arg, 0)) && array_at_struct_end_p (TREE_OPERAND (arg, 0))) - *flexp = true; + pdata->flexarray = true; } else if (TREE_CODE (arg) == COMPONENT_REF && (TREE_CODE (TREE_TYPE (TREE_OPERAND (arg, 1))) @@ -1389,7 +1385,7 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, Set *FLEXP to true if the array whose bound is being used is at the end of a struct. */ if (array_at_struct_end_p (arg)) - *flexp = true; + pdata->flexarray = true; arg = TREE_OPERAND (arg, 1); @@ -1407,7 +1403,7 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, integer_one_node); /* Set the minimum size to zero since the string in the array could have zero length. */ - *minlen = ssize_int (0); + pdata->minlen = ssize_int (0); } if (VAR_P (arg)) @@ -1427,7 +1423,7 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, wi::sub (wi::to_wide (val), 1)); /* Set the minimum size to zero since the string in the array could have zero length. */ - *minlen = ssize_int (0); + pdata->minlen = ssize_int (0); } } } @@ -1435,30 +1431,51 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, if (!val) return false; - if (!*minlen + /* Adjust the lower bound on the string length as necessary. */ + if (!pdata->minlen || (type > 0 - && TREE_CODE (*minlen) == INTEGER_CST + && TREE_CODE (pdata->minlen) == INTEGER_CST && TREE_CODE (val) == INTEGER_CST - && tree_int_cst_lt (val, *minlen))) - *minlen = val; + && tree_int_cst_lt (val, pdata->minlen))) + pdata->minlen = val; + + if (pdata->maxsize) + { + /* Adjust the tighter (more optimistic) bound if necessary + and proceed to adjust the more conservative bound. */ + if (TREE_CODE (val) == INTEGER_CST) + { + if (TREE_CODE (pdata->maxsize) == INTEGER_CST + && tree_int_cst_lt (pdata->maxsize, val)) + pdata->maxsize = val; + else + pdata->maxsize = build_all_ones_cst (size_type_node); + } + else + pdata->maxsize = val; + } + else + pdata->maxsize = val; - if (*maxlen) + if (pdata->maxlen) { + /* Adjust the more conservative bound if possible/necessary + and fail otherwise. */ if (type > 0) { - if (TREE_CODE (*maxlen) != INTEGER_CST + if (TREE_CODE (pdata->maxlen) != INTEGER_CST || TREE_CODE (val) != INTEGER_CST) return false; - if (tree_int_cst_lt (*maxlen, val)) - *maxlen = val; + if (tree_int_cst_lt (pdata->maxlen, val)) + pdata->maxlen = val; return true; } - else if (simple_cst_equal (val, *maxlen) != 1) + else if (simple_cst_equal (val, pdata->maxlen) != 1) return false; } - *maxlen = val; + pdata->maxlen = val; return true; } @@ -1486,8 +1503,7 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, || gimple_assign_unary_nop_p (def_stmt)) { tree rhs = gimple_assign_rhs1 (def_stmt); - return get_range_strlen (rhs, length, visited, type, fuzzy, flexp, - eltsize, nonstr); + return get_range_strlen (rhs, visited, type, fuzzy, pdata); } else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR) { @@ -1495,13 +1511,12 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, gimple_assign_rhs3 (def_stmt) }; for (unsigned int i = 0; i < 2; i++) - if (!get_range_strlen (ops[i], length, visited, type, fuzzy, - flexp, eltsize, nonstr)) + if (!get_range_strlen (ops[i], visited, type, fuzzy, pdata)) { - if (fuzzy == 2) - *maxlen = build_all_ones_cst (size_type_node); - else + if (fuzzy != 2) return false; + pdata->maxsize = build_all_ones_cst (size_type_node); + pdata->maxlen = pdata->maxsize; } return true; } @@ -1523,13 +1538,12 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, if (arg == gimple_phi_result (def_stmt)) continue; - if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp, - eltsize, nonstr)) + if (!get_range_strlen (arg, visited, type, fuzzy, pdata)) { - if (fuzzy == 2) - *maxlen = build_all_ones_cst (size_type_node); - else + if (fuzzy != 2) return false; + pdata->maxsize = build_all_ones_cst (size_type_node); + pdata->maxlen = pdata->maxsize; } } return true; @@ -1539,6 +1553,34 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, } } +/* Try to obtain the range of the lengths of the string(s) referenced + by ARG, or the size of the largest array ARG refers to if the range + of lengths cannot be determined, and store all in *PDATA. + If STRICT is true, handle PHIs and COND_EXPRs conservatively; when + it's false to handle PHIs and COND_EXPRs optimistically by storing + the minimum and maximum of only those strings whose length can be + determined, and ignoring others. + STRICT of false should be only used for warning code. */ + +bool +get_range_strlen (tree arg, bool strict, strlen_data_t *pdata) +{ + bitmap visited = NULL; + + if (!get_range_strlen (arg, &visited, /* type = */ 1, + /* fuzzy = */ strict ? 1 : 2, pdata)) + { + pdata->minlen = NULL_TREE; + pdata->maxsize = NULL_TREE; + pdata->maxlen = NULL_TREE; + } + + if (visited) + BITMAP_FREE (visited); + + return pdata->flexarray; +} + /* Determine the minimum and maximum value or string length that ARG refers to and store each in the first two elements of MINMAXLEN. For expressions that point to strings of unknown lengths that are @@ -1568,49 +1610,35 @@ bool get_range_strlen (tree arg, tree minmaxlen[2], unsigned eltsize, bool strict, tree *nonstr /* = NULL */) { - bitmap visited = NULL; + strlen_data_t data (eltsize); - minmaxlen[0] = NULL_TREE; - minmaxlen[1] = NULL_TREE; + bool ret = get_range_strlen (arg, strict, &data); + minmaxlen[0] = data.minlen; + minmaxlen[1] = data.maxlen; - tree nonstrbuf; - if (!nonstr) - nonstr = &nonstrbuf; - *nonstr = NULL_TREE; - - bool flexarray = false; - if (!get_range_strlen (arg, minmaxlen, &visited, 1, strict ? 1 : 2, - &flexarray, eltsize, nonstr)) - { - minmaxlen[0] = NULL_TREE; - minmaxlen[1] = NULL_TREE; - } - - if (visited) - BITMAP_FREE (visited); + if (nonstr) + *nonstr = data.nonstr; - return flexarray; + return ret; } /* Return the maximum string length for ARG, counting by TYPE (1, 2 or 4 for normal or wide chars). NONSTR indicates if the caller is prepared to handle unterminated strings. - If an unterminated string is discovered and our caller handles - unterminated strings, then bubble up the offending DECL and + If an unterminated array is discovered and our caller handles + unterminated arrays, then bubble up the offending DECL and return the maximum size. Otherwise return NULL. */ tree get_maxval_strlen (tree arg, int type, tree *nonstr /* = NULL */) { bitmap visited = NULL; - tree len[2] = { NULL_TREE, NULL_TREE }; - bool dummy; - /* Set to non-null if ARG refers to an untermianted array. */ - tree mynonstr = NULL_TREE; - if (!get_range_strlen (arg, len, &visited, type, 0, &dummy, 1, &mynonstr)) - len[1] = NULL_TREE; + strlen_data_t data (1); + + if (!get_range_strlen (arg, &visited, type, /* fuzzy = */ 0, &data)) + data.maxlen = NULL_TREE; if (visited) BITMAP_FREE (visited); @@ -1619,12 +1647,12 @@ get_maxval_strlen (tree arg, int type, tree *nonstr /* = NULL */) /* For callers prepared to handle unterminated arrays set *NONSTR to point to the declaration of the array and return the maximum length/size. */ - *nonstr = mynonstr; - return len[1]; + *nonstr = data.nonstr; + return data.maxlen; } /* Fail if the constant array isn't nul-terminated. */ - return mynonstr ? NULL_TREE : len[1]; + return data.nonstr ? NULL_TREE : data.maxlen; } diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h index 26e2727..0d523e7 100644 --- a/gcc/gimple-fold.h +++ b/gcc/gimple-fold.h @@ -25,6 +25,53 @@ along with GCC; see the file COPYING3. If not see extern tree create_tmp_reg_or_ssa_name (tree, gimple *stmt = NULL); extern tree canonicalize_constructor_val (tree, tree); extern tree get_symbol_constant_value (tree); + +struct strlen_data_t +{ + /* [MIN, MAXSIZE, MAXLEN] is a range describing the length of + a string of possibly unknown length. For a string of known + length the range is a constant where MIN == MAXSIZE == MAXLEN + holds. + For other strings, MIN is the length of the shortest string that + can be stored in the referenced object, i.e., MIN == 0. MAXSIZE + is the size of the largest array referenced by the expression. + MAXLEN is the length of the longest sequence of non-zero bytes + in memory reference by the expression. For such strings, + MIN <= MAXSIZE <= MAXLEN holds. For example, given: + struct A { char a[7], b[9]; }; + extern struct A *p; + n = strlen (p->a); + the computed range will be [0, 6, MAXOBJSIZE]. + As the tighter (and more optimistic) bound, MAXSIZE is suitable + for diagnostics but not for optimization. + As the more conservative bound, MAXLEN is intended to be used + for optimization. */ + tree minlen; + tree maxlen; + tree maxsize; + /* When non-null, NONSTR refers to the declaration known to store + an unterminated constant character array, as in: + const char s[] = { 'a', 'b', 'c' }; + It is used to diagnose uses of such arrays in functions such as + strlen() that expect a nul-terminated string as an argument. */ + tree nonstr; + /* ELTSIZE is set to 1 for single byte character strings, and 2 or 4 + for wide characer strings. */ + unsigned eltsize; + /* FLEXARRAY is true if the range of the string lengths has been + obtained from the upper bound of an array at the end of a struct. + Such an array may hold a string that's longer than its upper bound + due to it being used as a poor-man's flexible array member. */ + bool flexarray; + + /* Set ELTSIZE and value-initialize all other members. */ + strlen_data_t (unsigned eltbytes) + : minlen (), maxlen (), maxsize (), nonstr (), eltsize (eltbytes), + flexarray () { } +}; + +extern bool get_range_strlen (tree, strlen_data_t * = NULL); + extern bool get_range_strlen (tree, tree[2], unsigned = 1, bool = false, tree * = NULL); extern tree get_maxval_strlen (tree, int, tree * = NULL); diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c index 9b6f6e6..0ce016b 100644 --- a/gcc/gimple-ssa-sprintf.c +++ b/gcc/gimple-ssa-sprintf.c @@ -2007,7 +2007,8 @@ get_string_length (tree str, unsigned eltsize) aren't known to point any such arrays result in LENRANGE[1] set to SIZE_MAX. */ tree lenrange[2]; - bool flexarray = get_range_strlen (str, lenrange, eltsize); + bool flexarray = get_range_strlen (str, lenrange, eltsize, + /* strict = */ false); if (lenrange [0] || lenrange [1]) { diff --git a/gcc/testsuite/gcc.dg/strlenopt-51.c b/gcc/testsuite/gcc.dg/strlenopt-51.c index cbed11b..9ec718f 100644 --- a/gcc/testsuite/gcc.dg/strlenopt-51.c +++ b/gcc/testsuite/gcc.dg/strlenopt-51.c @@ -6,11 +6,12 @@ #define CONCAT(x, y) x ## y #define CAT(x, y) CONCAT (x, y) -#define FAILNAME(name) CAT (call_ ## name ##_on_line_, __LINE__) +#define FAILNAME(name, counter) \ + CAT (CAT (CAT (call_ ## name ##_on_line_, __LINE__), _), counter) -#define FAIL(name) do { \ - extern void FAILNAME (name) (void); \ - FAILNAME (name)(); \ +#define FAIL(name, counter) do { \ + extern void FAILNAME (name, counter) (void); \ + FAILNAME (name, counter)(); \ } while (0) /* Macro to emit a call to funcation named @@ -19,7 +20,7 @@ scan-tree-dump-time directive at the bottom of the test verifies that no such call appears in output. */ #define ELIM(expr) \ - if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0 + if (!(expr)) FAIL (in_true_branch_not_eliminated, __COUNTER__); else (void)0 /* Macro to emit a call to a function named call_made_in_{true,false}_branch_on_line_NNN() @@ -27,11 +28,13 @@ scan-tree-dump-time directive at the bottom of the test verifies that the expected number of both kinds of calls appears in output (a pair for each line with the invocation of the KEEP() macro. */ -#define KEEP(expr) \ +#define DO_KEEP(expr, counter) \ if (expr) \ - FAIL (made_in_true_branch); \ + FAIL (made_in_true_branch, counter); \ else \ - FAIL (made_in_false_branch) + FAIL (made_in_false_branch, counter) + +#define KEEP(expr) DO_KEEP (expr, __COUNTER__) #define T(s, n) ELIM (strlen (s) == n) @@ -75,7 +78,7 @@ const char a9_9[][9][9] = { { S5, S6, S7, S8, S0, S1, S2, S3, S4 }, { S6, S7, S8, S0, S1, S2, S3, S4, S5 }, { S7, S8, S0, S1, S2, S3, S4, S5, S6 }, - { S8, S0, S2, S2, S3, S4, S5, S6, S7 } + { S8, S0, S1, S2, S3, S4, S5, S6, S7 } }; void test_elim_a9_9 (int i) @@ -101,7 +104,7 @@ void test_keep_a9_9 (int i) { #undef T #define T(I) \ - KEEP (strlen (&a9_9[i][I][0]) > (1 + I) % 9); \ + KEEP (strlen (&a9_9[i][I][0]) > (0 + I) % 9); \ KEEP (strlen (&a9_9[i][I][1]) > (1 + I) % 9); \ KEEP (strlen (&a9_9[i][I][2]) > (2 + I) % 9); \ KEEP (strlen (&a9_9[i][I][3]) > (3 + I) % 9); \