On Thu, Apr 28, 2016 at 10:18 AM, Richard Biener
<richard.guent...@gmail.com> wrote:
> On Wed, Apr 27, 2016 at 5:49 PM, Bin Cheng <bin.ch...@arm.com> wrote:
>> Hi,
>> Currently tree if-conversion only supports PHIs with no more than two 
>> arguments unless the loop is marked with "simd pragma".  This patch makes 
>> such PHIs supported unconditionally if they have no more than 
>> MAX_PHI_ARG_NUM arguments, thus cases like PR56541 can be fixed.  Note 
>> because a chain of "?:" operators are needed to compute mult-arg PHI, this 
>> patch records the case and versions loop so that vectorizer can fall back to 
>> the original loop if if-conversion+vectorization isn't beneficial.  Ideally, 
>> cost computation in vectorizer should be improved to measure benefit against 
>> the original loop, rather than if-converted loop.  So far MAX_PHI_ARG_NUM is 
>> set to (4) because cases with more arguments are rare and not likely 
>> beneficial.
>>
>> Apart from above change, the patch also makes changes like: only split 
>> critical edge when we have to; cleanups code logic in if_convertible_loop_p 
>> about aggressive_if_conv.
>>
>> Bootstrap and test on x86_64 and AArch64, is it OK?
>
> Can you make this magic number a --param please?  Otherwise ok.
Hi,
Here is the updated patch.  I also added a vectorization test case
since PR56541 was reported against it.
Bootstrap & test on x86_64, is it OK?

Thanks,
bin

>
> Thanks,
> Richard.
>
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 67760b5..e26ea96 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -9020,6 +9020,10 @@ Large expressions slow the analyzer.
 Bound on the complexity of the expressions in the scalar evolutions analyzer.
 Complex expressions slow the analyzer.
 
+@item max-tree-if-conversion-phi-args
+Maximum number of arguments in a PHI supported by TREE if conversion
+unless the loop is marked with simd pragma.
+
 @item vect-max-version-for-alignment-checks
 The maximum number of run-time checks that can be performed when
 doing loop versioning for alignment in the vectorizer.
diff --git a/gcc/params.def b/gcc/params.def
index dbff305..754417e 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -530,6 +530,12 @@ DEFPARAM(PARAM_SCEV_MAX_EXPR_COMPLEXITY,
         "Bound on the complexity of the expressions in the scalar evolutions 
analyzer.",
         10, 0, 0)
 
+DEFPARAM (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS,
+         "max-tree-if-conversion-phi-args",
+         "Maximum number of arguments in a PHI supported by TREE if-conversion 
"
+         "unless the loop is marked with simd pragma.",
+         4, 2, 0)
+
 DEFPARAM(PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS,
          "vect-max-version-for-alignment-checks",
          "Bound on number of runtime checks inserted by the vectorizer's loop 
versioning for alignment check.",
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index 32ced16..97b62b7 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -113,11 +113,22 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "builtins.h"
 #include "params.h"
- 
+
+/* Only handle PHIs with no more arguments unless we are asked to by
+   simd pragma.  */
+#define MAX_PHI_ARG_NUM \
+  ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS))
+
 /* Indicate if new load/store that needs to be predicated is introduced
    during if conversion.  */
 static bool any_pred_load_store;
 
+/* Indicate if there are any complicated PHIs that need to be handled in
+   if-conversion.  Complicated PHI has more than two arguments and can't
+   be degenerated to two arguments PHI.  See more information in comment
+   before phi_convertible_by_degenerating_args.  */
+static bool any_complicated_phi;
+
 /* Hash for struct innermost_loop_behavior.  It depends on the user to
    free the memory.  */
 
@@ -172,9 +183,6 @@ innermost_loop_behavior_hash::equal (const value_type &e1,
 /* List of basic blocks in if-conversion-suitable order.  */
 static basic_block *ifc_bbs;
 
-/* Apply more aggressive (extended) if-conversion if true.  */
-static bool aggressive_if_conv;
-
 /* Hash table to store <DR's innermost loop behavior, DR> pairs.  */
 static hash_map<innermost_loop_behavior_hash,
                data_reference_p> *innermost_DR_map;
@@ -639,13 +647,9 @@ phi_convertible_by_degenerating_args (gphi *phi)
 }
 
 /* Return true when PHI is if-convertible.  PHI is part of loop LOOP
-   and it belongs to basic block BB.
-
-   PHI is not if-convertible if:
-   - it has more than 2 arguments.
-
-   When the aggressive_if_conv is set, PHI can have more than
-   two arguments.  */
+   and it belongs to basic block BB.  Note at this point, it is sure
+   that PHI is if-convertible.  This function updates global variable
+   ANY_COMPLICATED_PHI if PHI is complicated.  */
 
 static bool
 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi)
@@ -656,17 +660,10 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, 
gphi *phi)
       print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
     }
 
-  if (bb != loop->header)
-    {
-      if (gimple_phi_num_args (phi) > 2
-         && !aggressive_if_conv
-         && !phi_convertible_by_degenerating_args (phi))
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Phi can't be predicated by single cond.\n");
-         return false;
-        }
-    }
+  if (bb != loop->header
+      && gimple_phi_num_args (phi) > 2
+      && !phi_convertible_by_degenerating_args (phi))
+    any_complicated_phi = true;
 
   return true;
 }
@@ -1012,8 +1009,6 @@ has_pred_critical_p (basic_block bb)
    - it is after the exit block but before the latch,
    - its edges are not normal.
 
-   Last restriction is valid if aggressive_if_conv is false.
-
    EXIT_BB is the basic block containing the exit of the LOOP.  BB is
    inside LOOP.  */
 
@@ -1062,19 +1057,6 @@ if_convertible_bb_p (struct loop *loop, basic_block bb, 
basic_block exit_bb)
        return false;
       }
 
-  /* At least one incoming edge has to be non-critical as otherwise edge
-     predicates are not equal to basic-block predicates of the edge
-     source.  This check is skipped if aggressive_if_conv is true.  */
-  if (!aggressive_if_conv
-      && EDGE_COUNT (bb->preds) > 1
-      && bb != loop->header
-      && all_preds_critical_p (bb))
-    {
-      if (dump_file && (dump_flags & TDF_DETAILS))
-       fprintf (dump_file, "only critical predecessors\n");
-      return false;
-    }
-
   return true;
 }
 
@@ -2380,11 +2362,16 @@ version_loop_for_if_conversion (struct loop *loop)
   return true;
 }
 
-/* Performs splitting of critical edges if aggressive_if_conv is true.
-   Returns false if loop won't be if converted and true otherwise.  */
+/* Performs splitting of critical edges.  Skip splitting and return false
+   if LOOP will not be converted because:
+
+     - LOOP is not well formed.
+     - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
+
+   Last restriction is valid only if AGGRESSIVE_IF_CONV is false.  */
 
 static bool
-ifcvt_split_critical_edges (struct loop *loop)
+ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv)
 {
   basic_block *body;
   basic_block bb;
@@ -2393,30 +2380,51 @@ ifcvt_split_critical_edges (struct loop *loop)
   gimple *stmt;
   edge e;
   edge_iterator ei;
+  vec<edge> critical_edges = vNULL;
 
-  if (num <= 2)
-    return false;
-  if (loop->inner)
-    return false;
-  if (!single_exit (loop))
+  /* Loop is not well formed.  */
+  if (num <= 2 || loop->inner || !single_exit (loop))
     return false;
 
   body = get_loop_body (loop);
   for (i = 0; i < num; i++)
     {
       bb = body[i];
-      if (bb == loop->latch
-         || bb_with_exit_edge_p (loop, bb))
+      if (!aggressive_if_conv
+         && phi_nodes (bb)
+         && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
+       {
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           fprintf (dump_file,
+                    "BB %d has complicated PHI with more than %u args.\n",
+                    bb->index, MAX_PHI_ARG_NUM);
+
+         free (body);
+         critical_edges.release ();
+         return false;
+       }
+      if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
        continue;
+
       stmt = last_stmt (bb);
       /* Skip basic blocks not ending with conditional branch.  */
-      if (!(stmt && gimple_code (stmt) == GIMPLE_COND))
+      if (!stmt || gimple_code (stmt) != GIMPLE_COND)
        continue;
+
       FOR_EACH_EDGE (e, ei, bb->succs)
        if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
-         split_edge (e);
+         critical_edges.safe_push (e);
     }
   free (body);
+
+  while (critical_edges.length () > 0)
+    {
+      e = critical_edges.pop ();
+      /* Don't split if bb can be predicated along non-critical edge.  */
+      if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
+       split_edge (e);
+    }
+
   return true;
 }
 
@@ -2713,12 +2721,16 @@ static unsigned int
 tree_if_conversion (struct loop *loop)
 {
   unsigned int todo = 0;
+  bool aggressive_if_conv;
+
   ifc_bbs = NULL;
   any_pred_load_store = false;
+  any_complicated_phi = false;
 
-  /* Set up aggressive if-conversion for loops marked with simd pragma.  */
+  /* Apply more aggressive if-conversion when loop or its outer loop were
+     marked with simd pragma.  When that's the case, we try to if-convert
+     loop containing PHIs with more than MAX_PHI_ARG_NUM arguments.  */
   aggressive_if_conv = loop->force_vectorize;
-  /* Check either outer loop was marked with simd pragma.  */
   if (!aggressive_if_conv)
     {
       struct loop *outer_loop = loop_outer (loop);
@@ -2726,20 +2738,20 @@ tree_if_conversion (struct loop *loop)
        aggressive_if_conv = true;
     }
 
-  if (aggressive_if_conv)
-    if (!ifcvt_split_critical_edges (loop))
-      goto cleanup;
+  if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
+    goto cleanup;
 
   if (!if_convertible_loop_p (loop)
       || !dbg_cnt (if_conversion_tree))
     goto cleanup;
 
-  if (any_pred_load_store
+  if ((any_pred_load_store || any_complicated_phi)
       && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
          || loop->dont_vectorize))
     goto cleanup;
 
-  if (any_pred_load_store && !version_loop_for_if_conversion (loop))
+  if ((any_pred_load_store || any_complicated_phi)
+      && !version_loop_for_if_conversion (loop))
     goto cleanup;
 
   /* Now all statements are if-convertible.  Combine all the basic
@@ -2749,11 +2761,8 @@ tree_if_conversion (struct loop *loop)
 
   /* Delete dead predicate computations and repair tree correspondent
      to bool pattern to delete multiple uses of predicates.  */
-  if (aggressive_if_conv)
-    {
-      ifcvt_local_dce (loop->header);
-      ifcvt_repair_bool_pattern (loop->header);
-    }
+  ifcvt_local_dce (loop->header);
+  ifcvt_repair_bool_pattern (loop->header);
 
   todo |= TODO_cleanup_cfg;
   mark_virtual_operands_for_renaming (cfun);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-pr56541.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ifc-pr56541.c
new file mode 100644
index 0000000..bd0aa47
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-pr56541.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ifcvt-stats" { target *-*-* } } */
+
+float a,b,c,d;
+
+float z[1024]; int ok[1024];
+const float rBig = 150.;
+
+void foo()
+{
+  int i;
+
+  for (i=0; i!=1024; ++i)
+    {
+      float rR = a*z[i];
+      float rL = b*z[i];
+      float rMin = (rR<rL) ? rR : rL;
+      float rMax = (rR<rL) ? rL : rR;
+      rMin = (rMax>0) ? rMin : rBig;
+      rMin = (rMin>0) ? rMin : rMax;
+      ok[i] = rMin-c<rMax+d;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr56541.c 
b/gcc/testsuite/gcc.dg/vect/pr56541.c
new file mode 100644
index 0000000..16b8d7c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr56541.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-require-effective-target vect_condition } */
+
+float a,b,c,d;
+
+float z[1024]; int ok[1024];
+const float rBig = 150.;
+
+void foo()
+{
+  int i;
+
+  for (i=0; i!=1024; ++i)
+    {
+      float rR = a*z[i];
+      float rL = b*z[i];
+      float rMin = (rR<rL) ? rR : rL;
+      float rMax = (rR<rL) ? rL : rR;
+      rMin = (rMax>0) ? rMin : rBig;
+      rMin = (rMin>0) ? rMin : rMax;
+      ok[i] = rMin-c<rMax+d;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */

Reply via email to