Hi,
Entry pr41488 shows a case in which induction variable can't be recognized
or coalesced.  As noted in the bug entry, one possible fix is to teach PRE
not to do some specific transformation.  However, it's in nature a scalar
evolution issue.  Considering below snippet:

   # i_17 = PHI <i_13(5), 0(3)>
   # _20 = PHI <_5(5), start_4(D)(3)>
   ...
   i_13 = i_17 + 1;
   _5 = start_4(D) + i_13;

Variable _20 appears in the form of PEELED chrec like (start_4, _5)_LOOP,
meaning it has value "start_4" in the 1st iteration, then changes to _5 in
following iterations.  PEELED chrec is not implemented by GCC right now, it
can be simplified into either POLYNOMIAL or PERIOD one.  The POLYNOMIAL
chrec is processed by GCC after being recognized, as in the examle, _20 is
actually {start_4, 1}_LOOP.

This patch modifies scalar evolution by trying to simplify PEELED chrec.
Without this patch, generated code for pr41488.c is like:
foo:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        cmp     r1, r2
        bge     .L7
        push    {r4}
        mov     r3, r1
        ldr     r4, [r0]
        adds    r2, r2, #1
        adds    r1, r1, #1
        movs    r0, #0
.L3:
        str     r0, [r4, r3, lsl #2]
        mov     r3, r1
        adds    r1, r1, #1
        cmp     r1, r2
        bne     .L3
        ldr     r4, [sp], #4
.L7:
        bx      lr
        .size   foo, .-foo

Which is improved to:
foo:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        cmp     r1, r2
        bge     .L1
        ldr     r3, [r0]
        movs    r0, #0
        add     r1, r3, r1, lsl #2
        add     r2, r3, r2, lsl #2
.L3:
        str     r0, [r1], #4
        cmp     r1, r2
        bne     .L3
.L1:
        bx      lr
        .size   foo, .-foo

One point needs to be clarified that I use tree_to_aff_combination_expand in
the patch.  Rational is the number of the specific PEELED_CHRECs should be
moderate, I also check the equality literally firstly and resort to affine
facility if that fails.  By measuring bootstrap of gcc and spec2kint, I
collected the number of opportunities caught by this patch like below:
                            literal comparison/affine comparison
GCC                          ~1200/~250
Spec2Kint              93/34

I could back trace the ssa chain instead of using affine functions, but that
would miss some cases.

Bootstrap and test on x86/x86_64/arm.  Is it OK?

Thanks,
bin

2013-12-06  Bin Cheng  <bin.ch...@arm.com>

        PR tree-optimization/41488
        * tree-ssa-loop-ivopts.c (add_old_iv_candidates): Don't add cand
        for PEELED_CHREC kind of IV.
        * tree-scalar-evolution.c: Include necessary header files.
        (peeled_chrec_map, simplify_peeled_chrec): New.
        (analyze_evolution_in_loop): New static variable.
        Call simplify_peeled_chrec.
        (scev_initialize): Initialize peeled_chrec_map.
        (scev_reset, scev_finalize): Reset and release peeled_chrec_map.

gcc/testsuite/ChangeLog
2013-12-06  Bin Cheng  <bin.ch...@arm.com>

        * gcc.dg/tree-ssa/scev-7.c: New test.
        * gcc.dg/pr41488.c: New test.
Index: gcc/tree-scalar-evolution.c
===================================================================
--- gcc/tree-scalar-evolution.c (revision 205688)
+++ gcc/tree-scalar-evolution.c (working copy)
@@ -280,6 +280,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa.h"
 #include "cfgloop.h"
 #include "tree-chrec.h"
+#include "pointer-set.h"
+#include "tree-affine.h"
 #include "tree-scalar-evolution.h"
 #include "dumpfile.h"
 #include "params.h"
@@ -1380,7 +1382,66 @@ follow_ssa_edge (struct loop *loop, gimple def, gi
 }
 
 
+/* Pointer map used when simplifying PEELED_CHREC into POLYNOMIAL_CHREC.  */
+static pointer_map_t *peeled_chrec_map;
 
+/* Simplify PEELED_CHREC represented by (init_cond, arg) in LOOP.
+   Handle below case and return the corresponding POLYNOMIAL_CHREC:
+
+   # i_17 = PHI <i_13(5), 0(3)>
+   # _20 = PHI <_5(5), start_4(D)(3)>
+   ...
+   i_13 = i_17 + 1;
+   _5 = start_4(D) + i_13;
+
+   Though variable _20 appears as a PEELED_CHREC in the form of
+   (start_4, _5)_LOOP, it's a POLYNOMIAL_CHREC like {start_4, 1}_LOOP.
+
+   See PR41488.  */
+
+static tree
+simplify_peeled_chrec (struct loop *loop, tree arg, tree init_cond)
+{
+  aff_tree aff1, aff2;
+  tree ev, left, right, type, step_val;
+
+  ev = instantiate_parameters (loop, analyze_scalar_evolution (loop, arg));
+  if (ev == NULL_TREE || TREE_CODE (ev) != POLYNOMIAL_CHREC)
+    return chrec_dont_know;
+
+  left = CHREC_LEFT (ev);
+  right = CHREC_RIGHT (ev);
+  type = TREE_TYPE (left);
+  step_val = chrec_fold_plus (type, init_cond, right);
+
+  /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP
+     if "left" equals to "init + right".  */
+  if (operand_equal_p (left, step_val, 0))
+    {
+      if (dump_file && (dump_flags & TDF_SCEV))
+       fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n");
+
+      return build_polynomial_chrec (loop->num, init_cond, right);
+    }
+
+  /* Try harder to check if they are equal.  */
+  tree_to_aff_combination_expand (left, type, &aff1, &peeled_chrec_map);
+  tree_to_aff_combination_expand (step_val, type, &aff2, &peeled_chrec_map);
+  left = fold_convert (type, aff_combination_to_tree (&aff1));
+  step_val = fold_convert (type, aff_combination_to_tree (&aff2));
+
+  /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP
+     if "left" equals to "init + right".  */
+  if (operand_equal_p (left, step_val, 0))
+    {
+      if (dump_file && (dump_flags & TDF_SCEV))
+       fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n");
+
+      return build_polynomial_chrec (loop->num, init_cond, right);
+    }
+  return chrec_dont_know;
+}
+
 /* Given a LOOP_PHI_NODE, this function determines the evolution
    function from LOOP_PHI_NODE to LOOP_PHI_NODE in the loop.  */
 
@@ -1392,6 +1453,7 @@ analyze_evolution_in_loop (gimple loop_phi_node,
   tree evolution_function = chrec_not_analyzed_yet;
   struct loop *loop = loop_containing_stmt (loop_phi_node);
   basic_block bb;
+  static bool simplify_peeled_chrec_p = true;
 
   if (dump_file && (dump_flags & TDF_SCEV))
     {
@@ -1442,7 +1504,19 @@ analyze_evolution_in_loop (gimple loop_phi_node,
         all the other iterations it has the value of ARG.
         For the moment, PEELED_CHREC nodes are not built.  */
       if (res != t_true)
-       ev_fn = chrec_dont_know;
+       {
+         ev_fn = chrec_dont_know;
+         /* Try to recognize POLYNOMIAL_CHREC which appears in
+            the form of PEELED_CHREC, but guard the process with
+            a bool variable to keep the analyzer from infinite
+            recurrence for real PEELED_RECs.  */
+         if (simplify_peeled_chrec_p && TREE_CODE (arg) == SSA_NAME)
+           {
+             simplify_peeled_chrec_p = false;
+             ev_fn = simplify_peeled_chrec (loop, arg, init_cond);
+             simplify_peeled_chrec_p = true;
+           }
+       }
 
       /* When there are multiple back edges of the loop (which in fact never
         happens currently, but nevertheless), merge their evolutions.  */
@@ -3086,6 +3160,8 @@ scev_initialize (void)
 
   initialize_scalar_evolutions_analyzer ();
 
+  peeled_chrec_map = pointer_map_create ();
+
   FOR_EACH_LOOP (loop, 0)
     {
       loop->nb_iterations = NULL_TREE;
@@ -3122,6 +3198,12 @@ scev_reset (void)
 
   scev_reset_htab ();
 
+  if (peeled_chrec_map)
+    {
+      pointer_map_destroy (peeled_chrec_map);
+      peeled_chrec_map = NULL;
+    }
+
   if (!current_loops)
     return;
 
@@ -3209,6 +3291,11 @@ scev_finalize (void)
     return;
   htab_delete (scalar_evolution_info);
   scalar_evolution_info = NULL;
+  if (peeled_chrec_map)
+    {
+      pointer_map_destroy (peeled_chrec_map);
+      peeled_chrec_map = NULL;
+    }
 }
 
 /* Returns true if the expression EXPR is considered to be too expensive
Index: gcc/testsuite/gcc.dg/tree-ssa/scev-7.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/scev-7.c      (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-7.c      (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sccp-scev" } */
+
+struct struct_t
+{
+  int* data;
+};
+
+void foo (struct struct_t* sp, int start, int end)
+{
+  int i;
+
+  for (i = 1000; i+start > end; i--)
+    sp->data[i+start] = 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Simplify PEELED_CHREC into 
POLYNOMIAL_CHREC" 1 "sccp" } } */
+/* { dg-final { cleanup-tree-dump "sccp" } } */
Index: gcc/testsuite/gcc.dg/pr41488.c
===================================================================
--- gcc/testsuite/gcc.dg/pr41488.c      (revision 0)
+++ gcc/testsuite/gcc.dg/pr41488.c      (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sccp-scev" } */
+
+struct struct_t
+{
+  int* data;
+};
+
+void foo (struct struct_t* sp, int start, int end)
+{
+  int i;
+
+  for (i = 0; i+start < end; i++)
+    sp->data[i+start] = 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Simplify PEELED_CHREC into 
POLYNOMIAL_CHREC" 1 "sccp" } } */
+/* { dg-final { cleanup-tree-dump "sccp" } } */
Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc/tree-ssa-loop-ivopts.c  (revision 205688)
+++ gcc/tree-ssa-loop-ivopts.c  (working copy)
@@ -2526,11 +2526,19 @@ add_old_iv_candidates (struct ivopts_data *data, s
       /* Additionally record the possibility of leaving the original iv
         untouched.  */
       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
-      cand = add_candidate_1 (data,
-                             iv->base, iv->step, true, IP_ORIGINAL, NULL,
-                             SSA_NAME_DEF_STMT (def));
-      cand->var_before = iv->ssa_name;
-      cand->var_after = def;
+      /* Don't add candidate if it's from another PHI node because
+         it's an affine iv appearing in the form of PEELED_CHREC.  */
+      phi = SSA_NAME_DEF_STMT (def);
+      if (gimple_code (phi) != GIMPLE_PHI)
+       {
+         cand = add_candidate_1 (data,
+                                 iv->base, iv->step, true, IP_ORIGINAL, NULL,
+                                 SSA_NAME_DEF_STMT (def));
+         cand->var_before = iv->ssa_name;
+         cand->var_after = def;
+       }
+      else
+       gcc_assert (gimple_bb (phi) == data->current_loop->header);
     }
 }
 

Reply via email to