This is basically the same patch as attached to the PR, except that I
have changed the goto-loop into a do-while loop with a new comment;
this caused the need for a lot of reformatting.

bootstrapped & regtested on i686-pc-linux-gnu.
2013-04-08  Joern Rennecke  <joern.renne...@embecosm.com>

        * tree-ssa-math-opts.c (mult_to_fma_pass): New file static struct.
        (convert_mult_to_fma): In first pass, don't use an fms construct
        when we don't have an fms operation, but fmna.
        (execute_optimize_widening_mul): Add a second pass if
        convert_mult_to_fma requests it.

Index: gcc/tree-ssa-math-opts.c
===================================================================
--- gcc/tree-ssa-math-opts.c    (revision 197578)
+++ gcc/tree-ssa-math-opts.c    (working copy)
@@ -2461,6 +2461,12 @@ convert_plusminus_to_widen (gimple_stmt_
   return true;
 }
 
+static struct 
+{
+  bool second_pass;
+  bool retry_request;
+} mult_to_fma_pass;
+
 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
    with uses in additions and subtractions to form fused multiply-add
    operations.  Returns true if successful and MUL_STMT should be removed.  */
@@ -2570,6 +2576,22 @@ convert_mult_to_fma (gimple mul_stmt, tr
          return false;
        }
 
+      /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
+        by a MULT_EXPR that we'll visit later, we might be able to
+        get a more profitable match with fnma.
+        OTOH, if we don't, a negate / fma pair has likely lower latency
+        that a mult / subtract pair.  */
+      if (use_code == MINUS_EXPR && !negate_p
+         && gimple_assign_rhs1 (use_stmt) == result
+         && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
+         && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing
+         && mult_to_fma_pass.second_pass == false)
+       {
+         /* ??? Could make setting of retry_request dependent on some
+            rtx_cost measure we evaluate beforehand.  */
+         mult_to_fma_pass.retry_request = true;
+         return false;
+       }
       /* We can't handle a * b + a * b.  */
       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
        return false;
@@ -2657,76 +2679,89 @@ execute_optimize_widening_mul (void)
 
   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
 
-  FOR_EACH_BB (bb)
-    {
-      gimple_stmt_iterator gsi;
 
-      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
-        {
-         gimple stmt = gsi_stmt (gsi);
-         enum tree_code code;
+  /* We may run one or two passes.  In the first pass, if have fnma,
+     but not fms, we don't synthesize fms so that we can get the maximum
+     matches for fnma.  If we have therefore skipped opportunities to
+     synthesize fms, we'll run a second pass where we use any such
+     opportunities that still remain.  */
+  mult_to_fma_pass.retry_request = false;
+  do
+    {
+      mult_to_fma_pass.second_pass = mult_to_fma_pass.retry_request;
+      FOR_EACH_BB (bb)
+       {
+         gimple_stmt_iterator gsi;
 
-         if (is_gimple_assign (stmt))
+         for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
            {
-             code = gimple_assign_rhs_code (stmt);
-             switch (code)
+             gimple stmt = gsi_stmt (gsi);
+             enum tree_code code;
+
+             if (is_gimple_assign (stmt))
                {
-               case MULT_EXPR:
-                 if (!convert_mult_to_widen (stmt, &gsi)
-                     && convert_mult_to_fma (stmt,
-                                             gimple_assign_rhs1 (stmt),
-                                             gimple_assign_rhs2 (stmt)))
+                 code = gimple_assign_rhs_code (stmt);
+                 switch (code)
                    {
-                     gsi_remove (&gsi, true);
-                     release_defs (stmt);
-                     continue;
-                   }
-                 break;
-
-               case PLUS_EXPR:
-               case MINUS_EXPR:
-                 convert_plusminus_to_widen (&gsi, stmt, code);
-                 break;
+                   case MULT_EXPR:
+                     if (!convert_mult_to_widen (stmt, &gsi)
+                         && convert_mult_to_fma (stmt,
+                                                 gimple_assign_rhs1 (stmt),
+                                                 gimple_assign_rhs2 (stmt)))
+                       {
+                         gsi_remove (&gsi, true);
+                         release_defs (stmt);
+                         continue;
+                       }
+                     break;
+
+                   case PLUS_EXPR:
+                   case MINUS_EXPR:
+                     convert_plusminus_to_widen (&gsi, stmt, code);
+                     break;
 
-               default:;
+                   default:;
+                   }
                }
-           }
-         else if (is_gimple_call (stmt)
-                  && gimple_call_lhs (stmt))
-           {
-             tree fndecl = gimple_call_fndecl (stmt);
-             if (fndecl
-                 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+             else if (is_gimple_call (stmt)
+                      && gimple_call_lhs (stmt))
                {
-                 switch (DECL_FUNCTION_CODE (fndecl))
+                 tree fndecl = gimple_call_fndecl (stmt);
+                 if (fndecl
+                     && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
                    {
-                     case BUILT_IN_POWF:
-                     case BUILT_IN_POW:
-                     case BUILT_IN_POWL:
-                       if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
-                           && REAL_VALUES_EQUAL
-                                (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
-                                 dconst2)
-                           && convert_mult_to_fma (stmt,
-                                                   gimple_call_arg (stmt, 0),
-                                                   gimple_call_arg (stmt, 0)))
-                         {
-                           unlink_stmt_vdef (stmt);
-                           if (gsi_remove (&gsi, true)
-                               && gimple_purge_dead_eh_edges (bb))
-                             cfg_changed = true;
-                           release_defs (stmt);
-                           continue;
-                         }
+                     switch (DECL_FUNCTION_CODE (fndecl))
+                       {
+                       case BUILT_IN_POWF:
+                       case BUILT_IN_POW:
+                       case BUILT_IN_POWL:
+                         if ((TREE_CODE (gimple_call_arg (stmt, 1))
+                              == REAL_CST)
+                             && (REAL_VALUES_EQUAL
+                                 (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
+                                  dconst2))
+                             && (convert_mult_to_fma
+                                 (stmt, gimple_call_arg (stmt, 0),
+                                  gimple_call_arg (stmt, 0))))
+                           {
+                             unlink_stmt_vdef (stmt);
+                             if (gsi_remove (&gsi, true)
+                                 && gimple_purge_dead_eh_edges (bb))
+                               cfg_changed = true;
+                             release_defs (stmt);
+                             continue;
+                           }
                          break;
 
-                     default:;
+                         default:;
+                       }
                    }
                }
+             gsi_next (&gsi);
            }
-         gsi_next (&gsi);
        }
     }
+  while (!mult_to_fma_pass.second_pass && mult_to_fma_pass.retry_request);
 
   statistics_counter_event (cfun, "widening multiplications inserted",
                            widen_mul_stats.widen_mults_inserted);

Reply via email to