Adding tests for unsafe uses of unterminated constant char arrays in string functions exposed the limitation in strncmp folding described in PR 92501: GCC only folds strncmp calls involving nul-terminated constant strings.
The attached patch improves the folder to also handle unterminated constant character arrays. This capability is in turn relied on for the dependable detection of unsafe uses of unterminated arrays in strncpy, a patch for which I'm about to post separately. Tested on x86_64-linux. Martin
PR tree-optimization/92501 - strncmp with constant unterminated arrays not folded gcc/testsuite/ChangeLog: PR tree-optimization/92501 * gcc.dg/strcmpopt_7.c: New test. gcc/ChangeLog: PR tree-optimization/92501 * gimple-fold.c ((gimple_fold_builtin_string_compare): Let strncmp handle unterminated arrays. Rename local variables for clarity. Index: gcc/gimple-fold.c =================================================================== --- gcc/gimple-fold.c (revision 278253) +++ gcc/gimple-fold.c (working copy) @@ -2323,8 +2323,7 @@ gimple_load_first_char (location_t loc, tree str, return var; } -/* Fold a call to the str{n}{case}cmp builtin pointed by GSI iterator. - FCODE is the name of the builtin. */ +/* Fold a call to the str{n}{case}cmp builtin pointed by GSI iterator. */ static bool gimple_fold_builtin_string_compare (gimple_stmt_iterator *gsi) @@ -2337,18 +2336,19 @@ gimple_fold_builtin_string_compare (gimple_stmt_it tree str1 = gimple_call_arg (stmt, 0); tree str2 = gimple_call_arg (stmt, 1); tree lhs = gimple_call_lhs (stmt); - HOST_WIDE_INT length = -1; + tree len = NULL_TREE; + HOST_WIDE_INT bound = -1; /* Handle strncmp and strncasecmp functions. */ if (gimple_call_num_args (stmt) == 3) { - tree len = gimple_call_arg (stmt, 2); + len = gimple_call_arg (stmt, 2); if (tree_fits_uhwi_p (len)) - length = tree_to_uhwi (len); + bound = tree_to_uhwi (len); } /* If the LEN parameter is zero, return zero. */ - if (length == 0) + if (bound == 0) { replace_call_with_value (gsi, integer_zero_node); return true; @@ -2361,9 +2361,32 @@ gimple_fold_builtin_string_compare (gimple_stmt_it return true; } - const char *p1 = c_getstr (str1); - const char *p2 = c_getstr (str2); + /* Initially set to the number of characters, including the terminating + nul if each array has one. Nx == strnlen(Sx, Nx) implies that + the array is not terminated by a nul. + For nul-terminated strings then adjusted to their length. */ + unsigned HOST_WIDE_INT len1 = HOST_WIDE_INT_MAX, len2 = len1; + const char *p1 = c_getstr (str1, &len1); + const char *p2 = c_getstr (str2, &len2); + /* The position of the terminting nul character if one exists, otherwise + a value greater than LENx. */ + unsigned HOST_WIDE_INT nulpos1 = HOST_WIDE_INT_MAX, nulpos2 = nulpos1; + + if (p1) + { + nulpos1 = strnlen (p1, len1); + if (nulpos1 < len1) + len1 = nulpos1; + } + + if (p2) + { + nulpos2 = strnlen (p2, len2); + if (nulpos2 < len2) + len2 = nulpos2; + } + /* For known strings, return an immediate value. */ if (p1 && p2) { @@ -2374,17 +2397,19 @@ gimple_fold_builtin_string_compare (gimple_stmt_it { case BUILT_IN_STRCMP: case BUILT_IN_STRCMP_EQ: - { - r = strcmp (p1, p2); - known_result = true; + if (len1 != nulpos1 || len2 != nulpos2) break; - } + + r = strcmp (p1, p2); + known_result = true; + break; + case BUILT_IN_STRNCMP: case BUILT_IN_STRNCMP_EQ: { - if (length == -1) + if (bound == -1) break; - r = strncmp (p1, p2, length); + r = strncmp (p1, p2, bound); known_result = true; break; } @@ -2394,9 +2419,9 @@ gimple_fold_builtin_string_compare (gimple_stmt_it break; case BUILT_IN_STRNCASECMP: { - if (length == -1) + if (bound == -1) break; - r = strncmp (p1, p2, length); + r = strncmp (p1, p2, bound); if (r == 0) known_result = true; break; @@ -2412,7 +2437,7 @@ gimple_fold_builtin_string_compare (gimple_stmt_it } } - bool nonzero_length = length >= 1 + bool nonzero_bound = bound >= 1 || fcode == BUILT_IN_STRCMP || fcode == BUILT_IN_STRCMP_EQ || fcode == BUILT_IN_STRCASECMP; @@ -2420,7 +2445,7 @@ gimple_fold_builtin_string_compare (gimple_stmt_it location_t loc = gimple_location (stmt); /* If the second arg is "", return *(const unsigned char*)arg1. */ - if (p2 && *p2 == '\0' && nonzero_length) + if (p2 && *p2 == '\0' && nonzero_bound) { gimple_seq stmts = NULL; tree var = gimple_load_first_char (loc, str1, &stmts); @@ -2435,7 +2460,7 @@ gimple_fold_builtin_string_compare (gimple_stmt_it } /* If the first arg is "", return -*(const unsigned char*)arg2. */ - if (p1 && *p1 == '\0' && nonzero_length) + if (p1 && *p1 == '\0' && nonzero_bound) { gimple_seq stmts = NULL; tree var = gimple_load_first_char (loc, str2, &stmts); @@ -2454,9 +2479,9 @@ gimple_fold_builtin_string_compare (gimple_stmt_it return true; } - /* If len parameter is one, return an expression corresponding to + /* If BOUND is one, return an expression corresponding to (*(const unsigned char*)arg2 - *(const unsigned char*)arg1). */ - if (fcode == BUILT_IN_STRNCMP && length == 1) + if (fcode == BUILT_IN_STRNCMP && bound == 1) { gimple_seq stmts = NULL; tree temp1 = gimple_load_first_char (loc, str1, &stmts); @@ -2480,12 +2505,12 @@ gimple_fold_builtin_string_compare (gimple_stmt_it return true; } - /* If length is larger than the length of one constant string, - replace strncmp with corresponding strcmp */ - if (fcode == BUILT_IN_STRNCMP - && length > 0 - && ((p2 && (size_t) length > strlen (p2)) - || (p1 && (size_t) length > strlen (p1)))) + /* If BOUND is greater than the length of one constant string, + replace strncmp with strcmp. */ + if (fcode == BUILT_IN_STRNCMP + && bound > 0 + && ((p2 && (size_t) bound > len2) + || (p1 && (size_t) bound > len1))) { tree fn = builtin_decl_implicit (BUILT_IN_STRCMP); if (!fn) Index: gcc/testsuite/gcc.dg/strcmpopt_7.c =================================================================== --- gcc/testsuite/gcc.dg/strcmpopt_7.c (nonexistent) +++ gcc/testsuite/gcc.dg/strcmpopt_7.c (working copy) @@ -0,0 +1,85 @@ +/* PR tree-optimization/92501 - strncmp with constant unterminated arrays + not folded + { dg-do compile } + { dg-options "-O2 -Wall -fdump-tree-strlen" } */ + +const char a1[] = { '1' }; +const char a12[] = { '1', '2' }; +const char a112[] = { '1', '1', '2' }; +const char a123[] = { '1', '2', '3' }; + +const char s[] = ""; +const char s1[] = "1"; +const char s12[] = "12"; +const char s112[] = "112"; +const char s123[] = "123"; + +extern void failure_on_line (int); + +#define T(eql, s, t, n) do { \ + if (!(eql __builtin_strncmp (s, t, n))) \ + failure_on_line (__LINE__); \ + } while (0) + + +void test (void) +{ + /* Mixed array and string. */ + T (0 ==, a1, s, 0); + T (0 ==, a1, s1, 0); + T (0 ==, a1, s1, 1); + T (0 ==, a1, s12, 1); + + T (0 !=, a1, s12 + 1, 1); + + T (0 ==, a12, s, 0); + T (0 ==, a12, s1, 0); + T (0 ==, a12, s12, 0); + T (0 ==, a12, s12, 1); + T (0 ==, a12, s12, 2); + T (0 ==, a12, s123, 2); + + T (0 ==, a12 + 0, s123 + 1, 0); + T (0 !=, a12 + 0, s123 + 1, 1); + T (0 !=, a12 + 0, s123 + 1, 2); + T (0 ==, a12 + 1, s123 + 0, 0); + T (0 !=, a12 + 1, s123 + 0, 1); + T (0 ==, a12 + 1, s123 + 1, 1); + T (0 !=, a12 + 1, s123 + 2, 1); + T (0 !=, a12 + 1, s123 + 3, 1); + + /* Both arguments arrays. */ + T (0 ==, a112 + 0, a1, 1); + T (0 ==, a112 + 1, a1, 1); + T (0 !=, a112 + 2, a1, 1); + + T (0 ==, a1, a112 + 0, 1); + T (0 ==, a1, a112 + 1, 1); + T (0 !=, a1, a112 + 2, 1); + + T (0 ==, a112 + 0, a12, 0); + T (0 ==, a112 + 0, a12, 1); + T (0 !=, a112 + 0, a12, 2); + + T (0 ==, a112 + 1, a12, 2); + T (0 !=, a112 + 1, a12 + 1, 1); + T (0 ==, a112 + 2, a12 + 1, 1); + + /* Mixed array and string. */ + T (0 ==, s112 + 0, a12, 0); + T (0 ==, s112 + 0, a12, 1); + T (0 !=, s112 + 0, a12, 2); + + T (0 ==, s112 + 1, a12, 0); + T (0 ==, s112 + 1, a12, 1); + T (0 ==, s112 + 1, a12, 2); + T (0 !=, s112 + 2, a12, 2); + + T (0 ==, a112 + 0, s1, 1); + T (0 ==, a112 + 1, s1, 1); + T (0 !=, a112 + 2, s1, 1); +} + +/* { dg-final { scan-tree-dump-not "strcmp" "strlen1" } } + { dg-final { scan-tree-dump-not "strncmp" "strlen1" } } + { dg-final { scan-tree-dump-not "failure_on_line_" "strlen1" } } */