Hi, This patch folds strstr (s, t) eq/ne s to strcmp (s, t) eq/ne 0 if strlen (t) is known. One issue I came across was forwprop1 reverses the order of operands in eq_expr below:
eg test-case: _Bool f(char *s, int cond) { char *t1 = __builtin_strstr (s, "hello"); _Bool t2 = (t1 == s); return t2; } forwprop1 dump: f (char * s, int cond) { _Bool t2; char * t1; <bb 2> [0.0%]: t1_3 = __builtin_strstr (s_2(D), "hello"); t2_4 = s_2(D) == t1_3; return t2_4; } So I had to check if SSA_NAME_DEF_STMT (rhs2) was call to strstr rather than rhs1. I suppose that's OK ? clang unconditionally transforms strstr (s, t) == s to strncmp (s, t, strlen (t)) However I am not sure what algorithm glibc's strstr uses, so didn't attempt to transform if strlen (t) is unknown. Should we do the transform even if strlen (t) is unknown ? Thanks, Prathamesh
2016-12-05 Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> * tree-ssa-strlen.c (strlen_optimize_stmt): Fold strstr(s, t) == s to strcmp (s, t) == 0. (pass_data_strlen): Set todo_flags_finish to TODO_update_ssa. testsuite/ * gcc.dg/strlenopt-30.c: New test-case. diff --git a/gcc/testsuite/gcc.dg/strlenopt-30.c b/gcc/testsuite/gcc.dg/strlenopt-30.c new file mode 100644 index 0000000..737f37d --- /dev/null +++ b/gcc/testsuite/gcc.dg/strlenopt-30.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-strlen" } */ + +_Bool f1(char *s) +{ + char *t = "hello"; + char *t1 = __builtin_strstr (s, t); + _Bool t2 = (t1 == s); + return t2; +} + +_Bool f2(char *s) +{ + char *t = "hello"; + char *t1 = __builtin_strstr (s, t); + _Bool t2 = (t1 != s); + return t2; +} + +_Bool f3(char *s, char *t) +{ + char *t1 = __builtin_strstr (s, t); + _Bool t2 = (t1 == s); + return t2; +} + +/* { dg-final { scan-tree-dump-times "__builtin_strcmp" 2 "strlen" } } */ diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c index 339812e..8977e80 100644 --- a/gcc/tree-ssa-strlen.c +++ b/gcc/tree-ssa-strlen.c @@ -2302,6 +2302,55 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi) else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) handle_pointer_plus (gsi); } + + /* Fold strstr (s, t) == s to strcmp (s, t) == 0. if strlen (t) + is known. */ + else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (TREE_TYPE (lhs))) + { + enum tree_code code = gimple_assign_rhs_code (stmt); + if (code == EQ_EXPR || code == NE_EXPR) + { + tree rhs1 = gimple_assign_rhs1 (stmt); + tree rhs2 = gimple_assign_rhs2 (stmt); + if (TREE_CODE (rhs2) == SSA_NAME) + { + gcall *call_stmt = dyn_cast<gcall *> (SSA_NAME_DEF_STMT (rhs2)); + if (call_stmt + && gimple_call_builtin_p (call_stmt, BUILT_IN_STRSTR)) + { + tree arg0 = gimple_call_arg (call_stmt, 0); + if (operand_equal_p (arg0, rhs1, 0)) + { + /* Check if strlen(arg1) is known. */ + tree arg1 = gimple_call_arg (call_stmt, 1); + int idx = get_stridx (arg1); + strinfo *si = NULL; + if (idx) + si = get_strinfo (idx); + if ((idx < 0) + || (si && (get_string_length (si) != NULL_TREE))) + { + gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt); + tree strcmp_decl = builtin_decl_explicit (BUILT_IN_STRCMP); + gcall *strcmp_call = gimple_build_call (strcmp_decl, 2, + arg0, arg1); + tree strcmp_lhs = make_ssa_name (integer_type_node); + gimple_call_set_lhs (strcmp_call, strcmp_lhs); + update_stmt (strcmp_call); + gsi_remove (&gsi, true); + gsi_insert_before (&gsi, strcmp_call, GSI_SAME_STMT); + + gsi = gsi_for_stmt (stmt); + tree zero = build_zero_cst (TREE_TYPE (strcmp_lhs)); + gassign *ga = gimple_build_assign (lhs, code, + strcmp_lhs, zero); + gsi_replace (&gsi, ga, false); + } + } + } + } + } + } else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs)) { tree type = TREE_TYPE (lhs); @@ -2505,7 +2554,7 @@ const pass_data pass_data_strlen = 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ + TODO_update_ssa, /* todo_flags_finish */ }; class pass_strlen : public gimple_opt_pass