On Mon, Apr 8, 2013 at 5:10 PM, Joern Rennecke
<joern.renne...@embecosm.com> wrote:
> This is basically the same patch as attached to the PR, except that I
> have changed the goto-loop into a do-while loop with a new comment;
> this caused the need for a lot of reformatting.

Can you please include a testcase that shows the effect of this?

> bootstrapped & regtested on i686-pc-linux-gnu.
>
> 2013-04-08  Joern Rennecke  <joern.renne...@embecosm.com>
>
>         * tree-ssa-math-opts.c (mult_to_fma_pass): New file static struct.
>         (convert_mult_to_fma): In first pass, don't use an fms construct
>         when we don't have an fms operation, but fmna.

it's fnma I believe.

>         (execute_optimize_widening_mul): Add a second pass if
>         convert_mult_to_fma requests it.
>
> Index: gcc/tree-ssa-math-opts.c
> ===================================================================
> --- gcc/tree-ssa-math-opts.c    (revision 197578)
> +++ gcc/tree-ssa-math-opts.c    (working copy)
> @@ -2461,6 +2461,12 @@ convert_plusminus_to_widen (gimple_stmt_
>    return true;
>  }
>
> +static struct
> +{
> +  bool second_pass;
> +  bool retry_request;
> +} mult_to_fma_pass;
> +
>  /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
>     with uses in additions and subtractions to form fused multiply-add
>     operations.  Returns true if successful and MUL_STMT should be removed.
> */
> @@ -2570,6 +2576,22 @@ convert_mult_to_fma (gimple mul_stmt, tr
>           return false;
>         }
>
> +      /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
> +        by a MULT_EXPR that we'll visit later, we might be able to
> +        get a more profitable match with fnma.
> +        OTOH, if we don't, a negate / fma pair has likely lower latency
> +        that a mult / subtract pair.  */

This makes it sound that this is purely an ordering issue, thus for

  x = a * b;
  y = c * d;
  z = x - y;

instead of

  y = c * d;
  z = a * b + (-y);

you want to generate

  x = a * b;
  z = -(c * d + (-x));

I fail to see why you need two passes for this rather than considering the
case that the immediate use stmt of the multiplication we start from
combines another multiplication with a MINUS_EXPR.  That is ...

> +      if (use_code == MINUS_EXPR && !negate_p
> +         && gimple_assign_rhs1 (use_stmt) == result
> +         && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
> +         && optab_handler (fnma_optab, TYPE_MODE (type)) !=
> CODE_FOR_nothing
> +         && mult_to_fma_pass.second_pass == false)

see if that is the case here and simply not do the transform.  A second pass
will not recover from that without destroying the fnma pattern (testcase?)

Richard.

> +       {
> +         /* ??? Could make setting of retry_request dependent on some
> +            rtx_cost measure we evaluate beforehand.  */
> +         mult_to_fma_pass.retry_request = true;
> +         return false;
> +       }
>        /* We can't handle a * b + a * b.  */
>        if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
>         return false;
> @@ -2657,76 +2679,89 @@ execute_optimize_widening_mul (void)
>
>    memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
>
> -  FOR_EACH_BB (bb)
> -    {
> -      gimple_stmt_iterator gsi;
>
> -      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> -        {
> -         gimple stmt = gsi_stmt (gsi);
> -         enum tree_code code;
> +  /* We may run one or two passes.  In the first pass, if have fnma,
> +     but not fms, we don't synthesize fms so that we can get the maximum
> +     matches for fnma.  If we have therefore skipped opportunities to
> +     synthesize fms, we'll run a second pass where we use any such
> +     opportunities that still remain.  */
> +  mult_to_fma_pass.retry_request = false;
> +  do
> +    {
> +      mult_to_fma_pass.second_pass = mult_to_fma_pass.retry_request;
> +      FOR_EACH_BB (bb)
> +       {
> +         gimple_stmt_iterator gsi;
>
> -         if (is_gimple_assign (stmt))
> +         for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
>             {
> -             code = gimple_assign_rhs_code (stmt);
> -             switch (code)
> +             gimple stmt = gsi_stmt (gsi);
> +             enum tree_code code;
> +
> +             if (is_gimple_assign (stmt))
>                 {
> -               case MULT_EXPR:
> -                 if (!convert_mult_to_widen (stmt, &gsi)
> -                     && convert_mult_to_fma (stmt,
> -                                             gimple_assign_rhs1 (stmt),
> -                                             gimple_assign_rhs2 (stmt)))
> +                 code = gimple_assign_rhs_code (stmt);
> +                 switch (code)
>                     {
> -                     gsi_remove (&gsi, true);
> -                     release_defs (stmt);
> -                     continue;
> -                   }
> -                 break;
> -
> -               case PLUS_EXPR:
> -               case MINUS_EXPR:
> -                 convert_plusminus_to_widen (&gsi, stmt, code);
> -                 break;
> +                   case MULT_EXPR:
> +                     if (!convert_mult_to_widen (stmt, &gsi)
> +                         && convert_mult_to_fma (stmt,
> +                                                 gimple_assign_rhs1 (stmt),
> +                                                 gimple_assign_rhs2
> (stmt)))
> +                       {
> +                         gsi_remove (&gsi, true);
> +                         release_defs (stmt);
> +                         continue;
> +                       }
> +                     break;
> +
> +                   case PLUS_EXPR:
> +                   case MINUS_EXPR:
> +                     convert_plusminus_to_widen (&gsi, stmt, code);
> +                     break;
>
> -               default:;
> +                   default:;
> +                   }
>                 }
> -           }
> -         else if (is_gimple_call (stmt)
> -                  && gimple_call_lhs (stmt))
> -           {
> -             tree fndecl = gimple_call_fndecl (stmt);
> -             if (fndecl
> -                 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
> +             else if (is_gimple_call (stmt)
> +                      && gimple_call_lhs (stmt))
>                 {
> -                 switch (DECL_FUNCTION_CODE (fndecl))
> +                 tree fndecl = gimple_call_fndecl (stmt);
> +                 if (fndecl
> +                     && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
>                     {
> -                     case BUILT_IN_POWF:
> -                     case BUILT_IN_POW:
> -                     case BUILT_IN_POWL:
> -                       if (TREE_CODE (gimple_call_arg (stmt, 1)) ==
> REAL_CST
> -                           && REAL_VALUES_EQUAL
> -                                (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
> -                                 dconst2)
> -                           && convert_mult_to_fma (stmt,
> -                                                   gimple_call_arg (stmt,
> 0),
> -                                                   gimple_call_arg (stmt,
> 0)))
> -                         {
> -                           unlink_stmt_vdef (stmt);
> -                           if (gsi_remove (&gsi, true)
> -                               && gimple_purge_dead_eh_edges (bb))
> -                             cfg_changed = true;
> -                           release_defs (stmt);
> -                           continue;
> -                         }
> +                     switch (DECL_FUNCTION_CODE (fndecl))
> +                       {
> +                       case BUILT_IN_POWF:
> +                       case BUILT_IN_POW:
> +                       case BUILT_IN_POWL:
> +                         if ((TREE_CODE (gimple_call_arg (stmt, 1))
> +                              == REAL_CST)
> +                             && (REAL_VALUES_EQUAL
> +                                 (TREE_REAL_CST (gimple_call_arg (stmt,
> 1)),
> +                                  dconst2))
> +                             && (convert_mult_to_fma
> +                                 (stmt, gimple_call_arg (stmt, 0),
> +                                  gimple_call_arg (stmt, 0))))
> +                           {
> +                             unlink_stmt_vdef (stmt);
> +                             if (gsi_remove (&gsi, true)
> +                                 && gimple_purge_dead_eh_edges (bb))
> +                               cfg_changed = true;
> +                             release_defs (stmt);
> +                             continue;
> +                           }
>                           break;
>
> -                     default:;
> +                         default:;
> +                       }
>                     }
>                 }
> +             gsi_next (&gsi);
>             }
> -         gsi_next (&gsi);
>         }
>      }
> +  while (!mult_to_fma_pass.second_pass && mult_to_fma_pass.retry_request);
>
>    statistics_counter_event (cfun, "widening multiplications inserted",
>                             widen_mul_stats.widen_mults_inserted);
>

Reply via email to