On October 13, 2017 9:43:10 PM GMT+02:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >First of all, there was a typo, we are optimizing >(x != 0) ? x + y : y >into x + y rather than y. > >And, as the comment mentions, the condition that there is just a single >stmt is too restrictive and in the various patterns posted in the >various >rotate related PRs there are cases where in addition to the last >assign stmt there are some preparation statements (sometimes >conversion, >sometimes bit masking with constant, sometimes both). >I think it is undesirable to turn the conditional code into always >executed >one if it is too large, so this patch allows just very few very cheap >preparation statements which feed each other and it is easily possible >to >propagate the cond_rhs constant through those statements to compute >what >the result from those would be (and make sure no UB). > >With this patch on top of the previous one, on rotate-8.c testcase >on x86_64-linux and i686-linux we get the most efficient code in all >cases >- just a rol/ror instruction with perhaps some moves into registers >needed >to perform that, but without any conditionals, masking etc. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Richard. >2017-10-13 Jakub Jelinek <ja...@redhat.com> > > PR middle-end/62263 > PR middle-end/82498 > * tree-ssa-phiopt.c (value_replacement): Comment fix. Handle > up to 2 preparation statements for ASSIGN in MIDDLE_BB. > > * c-c++-common/rotate-8.c: Expect no PHIs in optimized dump. > >--- gcc/tree-ssa-phiopt.c.jj 2017-10-10 22:04:08.000000000 +0200 >+++ gcc/tree-ssa-phiopt.c 2017-10-13 17:55:01.578450763 +0200 >@@ -995,11 +995,13 @@ value_replacement (basic_block cond_bb, > > } > >- /* Now optimize (x != 0) ? x + y : y to just y. >- The following condition is too restrictive, there can easily be >another >- stmt in middle_bb, for instance a CONVERT_EXPR for the second >argument. */ >- gimple *assign = last_and_only_stmt (middle_bb); >- if (!assign || gimple_code (assign) != GIMPLE_ASSIGN >+ /* Now optimize (x != 0) ? x + y : y to just x + y. */ >+ gsi = gsi_last_nondebug_bb (middle_bb); >+ if (gsi_end_p (gsi)) >+ return 0; >+ >+ gimple *assign = gsi_stmt (gsi); >+ if (!is_gimple_assign (assign) > || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS > || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) > && !POINTER_TYPE_P (TREE_TYPE (arg0)))) >@@ -1009,6 +1011,71 @@ value_replacement (basic_block cond_bb, > if (!gimple_seq_empty_p (phi_nodes (middle_bb))) > return 0; > >+ /* Allow up to 2 cheap preparation statements that prepare argument >+ for assign, e.g.: >+ if (y_4 != 0) >+ goto <bb 3>; >+ else >+ goto <bb 4>; >+ <bb 3>: >+ _1 = (int) y_4; >+ iftmp.0_6 = x_5(D) r<< _1; >+ <bb 4>: >+ # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)> >+ or: >+ if (y_3(D) == 0) >+ goto <bb 4>; >+ else >+ goto <bb 3>; >+ <bb 3>: >+ y_4 = y_3(D) & 31; >+ _1 = (int) y_4; >+ _6 = x_5(D) r<< _1; >+ <bb 4>: >+ # _2 = PHI <x_5(D)(2), _6(3)> */ >+ gimple *prep_stmt[2] = { NULL, NULL }; >+ int prep_cnt; >+ for (prep_cnt = 0; ; prep_cnt++) >+ { >+ gsi_prev_nondebug (&gsi); >+ if (gsi_end_p (gsi)) >+ break; >+ >+ gimple *g = gsi_stmt (gsi); >+ if (gimple_code (g) == GIMPLE_LABEL) >+ break; >+ >+ if (prep_cnt == 2 || !is_gimple_assign (g)) >+ return 0; >+ >+ tree lhs = gimple_assign_lhs (g); >+ tree rhs1 = gimple_assign_rhs1 (g); >+ use_operand_p use_p; >+ gimple *use_stmt; >+ if (TREE_CODE (lhs) != SSA_NAME >+ || TREE_CODE (rhs1) != SSA_NAME >+ || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) >+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) >+ || !single_imm_use (lhs, &use_p, &use_stmt) >+ || use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign)) >+ return 0; >+ switch (gimple_assign_rhs_code (g)) >+ { >+ CASE_CONVERT: >+ break; >+ case PLUS_EXPR: >+ case BIT_AND_EXPR: >+ case BIT_IOR_EXPR: >+ case BIT_XOR_EXPR: >+ if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST) >+ return 0; >+ break; >+ default: >+ return 0; >+ } >+ prep_stmt[prep_cnt] = g; >+ } >+ > /* Only transform if it removes the condition. */ >if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), >e0, e1)) > return 0; >@@ -1019,7 +1086,7 @@ value_replacement (basic_block cond_bb, > && profile_status_for_fn (cfun) != PROFILE_ABSENT >&& EDGE_PRED (middle_bb, 0)->probability < profile_probability::even () > /* If assign is cheap, there is no point avoiding it. */ >- && estimate_num_insns (assign, &eni_time_weights) >+ && estimate_num_insns (bb_seq (middle_bb), &eni_time_weights) > >= 3 * estimate_num_insns (cond, &eni_time_weights)) > return 0; > >@@ -1030,6 +1097,32 @@ value_replacement (basic_block cond_bb, > tree cond_lhs = gimple_cond_lhs (cond); > tree cond_rhs = gimple_cond_rhs (cond); > >+ /* Propagate the cond_rhs constant through preparation stmts, >+ make sure UB isn't invoked while doing that. */ >+ for (int i = prep_cnt - 1; i >= 0; --i) >+ { >+ gimple *g = prep_stmt[i]; >+ tree grhs1 = gimple_assign_rhs1 (g); >+ if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1)) >+ return 0; >+ cond_lhs = gimple_assign_lhs (g); >+ cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs); >+ if (TREE_CODE (cond_rhs) != INTEGER_CST >+ || TREE_OVERFLOW (cond_rhs)) >+ return 0; >+ if (gimple_assign_rhs_class (g) == GIMPLE_BINARY_RHS) >+ { >+ cond_rhs = int_const_binop (gimple_assign_rhs_code (g), cond_rhs, >+ gimple_assign_rhs2 (g)); >+ if (TREE_OVERFLOW (cond_rhs)) >+ return 0; >+ } >+ cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs); >+ if (TREE_CODE (cond_rhs) != INTEGER_CST >+ || TREE_OVERFLOW (cond_rhs)) >+ return 0; >+ } >+ > if (((code == NE_EXPR && e1 == false_edge) > || (code == EQ_EXPR && e1 == true_edge)) > && arg0 == lhs >@@ -1071,7 +1164,15 @@ value_replacement (basic_block cond_bb, > duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires), > phires_range_info); > } >- gimple_stmt_iterator gsi_from = gsi_for_stmt (assign); >+ gimple_stmt_iterator gsi_from; >+ for (int i = prep_cnt - 1; i >= 0; --i) >+ { >+ tree plhs = gimple_assign_lhs (prep_stmt[i]); >+ SSA_NAME_RANGE_INFO (plhs) = NULL; >+ gsi_from = gsi_for_stmt (prep_stmt[i]); >+ gsi_move_before (&gsi_from, &gsi); >+ } >+ gsi_from = gsi_for_stmt (assign); > gsi_move_before (&gsi_from, &gsi); > replace_phi_edge_with_variable (cond_bb, e1, phi, lhs); > return 2; >--- gcc/testsuite/c-c++-common/rotate-8.c.jj 2017-10-13 >15:55:32.000000000 +0200 >+++ gcc/testsuite/c-c++-common/rotate-8.c 2017-10-13 17:57:32.861627651 >+0200 >@@ -3,6 +3,7 @@ > /* { dg-do compile } */ > /* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */ >/* { dg-final { scan-tree-dump-times "r\[<>]\[<>]" 23 "optimized" } } >*/ >+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */ > > unsigned int > f1 (unsigned int x, unsigned char y) > > Jakub