Hi Jakub, 

> -----Original Message-----
> From: Jakub Jelinek <ja...@redhat.com>
> Sent: Tuesday, March 27, 2018 2:40 PM
> To: Richard Biener <rguent...@suse.de>
> Cc: gcc-patches@gcc.gnu.org; Kumar, Venkataramanan
> <venkataramanan.ku...@amd.com>
> Subject: [PATCH] Improve TRUTH_{AND,OR}IF_EXPR expansion (PR rtl-
> optimization/78200)
> 
> Hi!
> 
> The following patch attempts to improve expansion, if we have code like:
>   <bb 16> [local count: 102513059]:
>   if_conversion_var_52 = MEM[base: st1_22, offset: 0B];
>   if (if_conversion_var_52 < 0)
>     goto <bb 17>; [41.00%]
>   else
>     goto <bb 18>; [59.00%]
> 
> ...
> 
>   <bb 18> [local count: 60482706]:
>   _81 = _11 == 2;
>   _82 = if_conversion_var_52 > 0;
>   _79 = _81 & _82;
>   if (_79 != 0)
>     goto <bb 19>; [29.26%]
>   else
>     goto <bb 20>; [70.74%]
> 
> Here, the single pred of the bb performed a similar comparison to what one
> of the & (or |) operands does, and as & (or |) is not ordered, we can choose
> which operand we'll expand first.  If we expand if_conversion_var_52 > 0
> first, there is a chance that we can reuse flags from the previous comparison.
> The patch does it only if there are no (non-virtual) phis on the current bb, 
> all
> stmts before the current condition are TERable, so there is nothing that
> would clobber the flags in between.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk if it
> helps for SPEC?  Venkat, do you think you could benchmark it in the setup
> where you've measured the slowdown to see if it helps?  I see the patch
> changes the loop:

Sure will benchmark and get back to you. 
 

>       .p2align 4,,10
>       .p2align 3
>  .L11:
> +     jle     .L10
>       cmpl    $2, %eax
> -     jne     .L10
> -     testq   %r8, %r8
> -     jg      .L12
> +     je      .L12
>       .p2align 4,,10
>       .p2align 3
>  .L10:
>       addq    %r9, %rsi
>       cmpq    %rsi, %rdx
>       jbe     .L35
>  .L13:
>       movl    24(%rsi), %eax
>       testl   %eax, %eax
>       jle     .L10
>       movq    (%rsi), %r8
>       testq   %r8, %r8
>       jns     .L11
>       cmpl    $1, %eax
>       jne     .L10
>  .L12:
>       addq    $1, %r10
>       movl    $1, %r11d
>       movq    st5(,%r10,8), %rax
>       movq    %rsi, (%rax)
>       addq    %r9, %rsi
>       movq    %r8, 8(%rax)
>       movq    st5(,%rdi,8), %rax
>       movq    %r8, 16(%rax)
>       cmpq    %rsi, %rdx
>       ja      .L13
> which I assume shall be an improvement, since we can save one extra
> comparison.
> 
> 2018-03-27  Jakub Jelinek  <ja...@redhat.com>
>           Richard Biener  <rgue...@suse.de>
> 
>       PR rtl-optimization/78200
>       * cfgexpand.c (gimple_bb_info_for_bb): New variable.
>       (expand_bb_seq, expand_phi_nodes): New functions.
>       (expand_gimple_cond): Use heuristics if it is desirable to
>       swap TRUTH_{AND,OR}IF_EXPR operands.
>       (expand_gimple_basic_block): Remember GIMPLE il for bbs
>       being expanded or already expanded.
>       (pass_expand::execute): Initialize and then free the
>       gimple_bb_info_for_bb vector.
> 
> --- gcc/cfgexpand.c.jj        2018-02-09 06:44:36.413805556 +0100
> +++ gcc/cfgexpand.c   2018-03-26 13:35:57.536509844 +0200
> @@ -2357,6 +2357,34 @@ label_rtx_for_bb (basic_block bb ATTRIBU
>    return l;
>  }
> 
> +/* Maps blocks to their GIMPLE IL.  */
> +static vec<gimple_bb_info, va_heap, vl_embed> *gimple_bb_info_for_bb;
> +
> +/* Like bb_seq, except that during expansion returns the GIMPLE seq
> +   even for the blocks that have been already expanded or are being
> +   currently expanded.  */
> +
> +static gimple_seq
> +expand_bb_seq (basic_block bb)
> +{
> +  if ((bb->flags & BB_RTL)
> +      && (unsigned) bb->index < vec_safe_length (gimple_bb_info_for_bb))
> +    return (*gimple_bb_info_for_bb)[bb->index].seq;
> +  return bb_seq (bb);
> +}
> +
> +/* Like phi_nodes, except that during expansion returns the GIMPLE PHIs
> +   even for the blocks that have been already expanded or are being
> +   currently expanded.  */
> +
> +static gimple_seq
> +expand_phi_nodes (basic_block bb)
> +{
> +  if ((bb->flags & BB_RTL)
> +      && (unsigned) bb->index < vec_safe_length (gimple_bb_info_for_bb))
> +    return (*gimple_bb_info_for_bb)[bb->index].phi_nodes;
> +  return phi_nodes (bb);
> +}
> 
>  /* A subroutine of expand_gimple_cond.  Given E, a fallthrough edge
>     of a basic block where we just expanded the conditional at the end, @@ -
> 2475,6 +2503,65 @@ expand_gimple_cond (basic_block bb, gcon
>                 op0 = gimple_assign_rhs1 (second);
>                 op1 = gimple_assign_rhs2 (second);
>               }
> +
> +           /* We'll expand RTL for op0 first, see if we'd better expand RTL
> +              for op1 first.  Do that if the previous bb ends with
> +              if (x op cst), op1's def_stmt rhs is x op2 cst and there are
> +              no non-virtual PHIs nor non-TERed stmts in BB before STMT.
> */
> +           while (TREE_CODE (op1) == SSA_NAME
> +                  && (code == TRUTH_ANDIF_EXPR || code ==
> TRUTH_ORIF_EXPR)
> +                  && single_pred_p (bb))
> +             {
> +               gimple *def1 = SSA_NAME_DEF_STMT (op1);
> +               if (!is_gimple_assign (def1)
> +                   || (TREE_CODE_CLASS (gimple_assign_rhs_code (def1))
> +                       != tcc_comparison))
> +                 break;
> +
> +               basic_block pred = single_pred (bb);
> +               gimple_seq pred_seq = expand_bb_seq (pred);
> +               gimple_stmt_iterator i = gsi_last (pred_seq);
> +               if (!gsi_end_p (i) && is_gimple_debug (gsi_stmt (i)))
> +                 gsi_prev_nondebug (&i);
> +
> +               gimple *last = gsi_stmt (i);
> +               if (!last
> +                   || gimple_code (last) != GIMPLE_COND
> +                   || gimple_assign_rhs1 (def1) != gimple_cond_lhs (last)
> +                   || !operand_equal_p (gimple_assign_rhs2 (def1),
> +                                        gimple_cond_rhs (last), 0))
> +                 break;
> +
> +               gimple_seq phi_seq = expand_phi_nodes (bb);
> +               i = gsi_start (phi_seq);
> +               if (!gsi_end_p (i)
> +                   && !virtual_operand_p (gimple_phi_result (gsi_stmt (i))))
> +                 break;
> +
> +               /* Check if all stmts before STMT are TERed.  */
> +               gimple_seq cur_seq = expand_bb_seq (bb);
> +               i = gsi_start_nondebug (cur_seq);
> +               int cnt = 100;
> +               while (!gsi_end_p (i) && gsi_stmt (i) != stmt)
> +                 {
> +                   gimple *g = gsi_stmt (i);
> +                   if (!is_gimple_assign (g))
> +                     break;
> +                   if (--cnt == 0)
> +                     break;
> +                   tree lhs = gimple_assign_lhs (g);
> +                   if (TREE_CODE (lhs) != SSA_NAME
> +                       || !get_gimple_for_ssa_name (lhs))
> +                     break;
> +                   gsi_next_nondebug (&i);
> +                 }
> +
> +               if (gsi_stmt (i) != stmt)
> +                 break;
> +
> +               std::swap (op0, op1);
> +               break;
> +             }
>           }
>       }
>      }
> @@ -5501,6 +5588,10 @@ expand_gimple_basic_block (basic_block b
>    if (optimize)
>      reorder_operands (bb);
>    stmts = bb_seq (bb);
> +  if ((unsigned) bb->index >= vec_safe_length (gimple_bb_info_for_bb))
> +    vec_safe_grow_cleared (gimple_bb_info_for_bb, bb->index + 1);
> + (*gimple_bb_info_for_bb)[bb->index].seq = stmts;
> + (*gimple_bb_info_for_bb)[bb->index].phi_nodes = phi_nodes (bb);
>    bb->il.gimple.seq = NULL;
>    bb->il.gimple.phi_nodes = NULL;
>    rtl_profile_for_bb (bb);
> @@ -6419,6 +6510,8 @@ pass_expand::execute (function *fun)
>        >= PARAM_VALUE (PARAM_MAX_DEBUG_MARKER_COUNT))
>      cfun->debug_nonbind_markers = false;
> 
> +  vec_safe_grow_cleared (gimple_bb_info_for_bb, n_basic_blocks_for_fn
> + (cfun));
> +
>    lab_rtx_for_bb = new hash_map<basic_block, rtx_code_label *>;
>    FOR_BB_BETWEEN (bb, init_block->next_bb, EXIT_BLOCK_PTR_FOR_FN
> (fun),
>                 next_bb)
> @@ -6433,6 +6526,8 @@ pass_expand::execute (function *fun)
>        deep_ter_debug_map = NULL;
>      }
> 
> +  vec_free (gimple_bb_info_for_bb);
> +
>    /* Free stuff we no longer need after GIMPLE optimizations.  */
>    free_dominance_info (CDI_DOMINATORS);
>    free_dominance_info (CDI_POST_DOMINATORS);
> 
>       Jakub

Regards,
Venkat.

Reply via email to