Currently BB vectorization computes all dependences inside a BB
region and fails all vectorization if it cannot handle some of them.

This is obviously not needed - BB vectorization can restrict the
dependence tests to those that are needed to apply the load/store
motion effectively performed by the vectorization (sinking all
participating loads/stores to the place of the last one).

With restructuring it that way it's also easy to not give up completely
but only for the SLP instance we cannot vectorize (this gives
a slight bump in my SPEC CPU 2006 testing to 756 vectorized basic
block regions).

But first and foremost this patch is to reduce the dependence analysis
cost and somewhat mitigate the compile-time effects of the first patch.

For fixing PR56118 only a cost model issue remains.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-11-09  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/56118
        * tree-vectorizer.h (vect_find_last_scalar_stmt_in_slp): Declare.
        * tree-vect-slp.c (vect_find_last_scalar_stmt_in_slp): Export.
        * tree-vect-data-refs.c (vect_slp_analyze_node_dependences): New
        function.
        (vect_slp_analyze_data_ref_dependences): Instead of computing
        all dependences of the region DRs just analyze the code motions
        SLP vectorization will perform.  Remove SLP instances that
        cannot have their store/load motions applied.
        (vect_analyze_data_refs): Allow DRs without a vectype
        in BB vectorization.

        * gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c: Adjust.

Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h.orig  2015-11-09 11:01:55.688175321 +0100
--- gcc/tree-vectorizer.h       2015-11-09 11:02:18.987432840 +0100
*************** extern void vect_detect_hybrid_slp (loop
*** 1075,1080 ****
--- 1075,1081 ----
  extern void vect_get_slp_defs (vec<tree> , slp_tree,
                               vec<vec<tree> > *, int);
  extern bool vect_slp_bb (basic_block);
+ extern gimple *vect_find_last_scalar_stmt_in_slp (slp_tree);
  
  /* In tree-vect-patterns.c.  */
  /* Pattern recognition functions.
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c.orig      2015-11-09 10:22:33.140125722 +0100
--- gcc/tree-vect-data-refs.c   2015-11-09 11:33:05.503874719 +0100
*************** vect_slp_analyze_data_ref_dependence (st
*** 581,586 ****
--- 581,629 ----
  }
  
  
+ /* Analyze dependences involved in the transform of SLP NODE.  */
+ 
+ static bool
+ vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node)
+ {
+   /* This walks over all stmts involved in the SLP load/store done
+      in NODE verifying we can sink them up to the last stmt in the
+      group.  */
+   gimple *last_access = vect_find_last_scalar_stmt_in_slp (node);
+   for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
+     {
+       gimple *access = SLP_TREE_SCALAR_STMTS (node)[k];
+       if (access == last_access)
+       continue;
+       stmt_vec_info access_stmt_info = vinfo_for_stmt (access);
+       gimple_stmt_iterator gsi = gsi_for_stmt (access);
+       gsi_next (&gsi);
+       for (; gsi_stmt (gsi) != last_access; gsi_next (&gsi))
+       {
+         gimple *stmt = gsi_stmt (gsi);
+         stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+         if (!STMT_VINFO_DATA_REF (stmt_info)
+             || (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))
+                 && DR_IS_READ (STMT_VINFO_DATA_REF (access_stmt_info))))
+           continue;
+ 
+         ddr_p ddr = initialize_data_dependence_relation
+             (STMT_VINFO_DATA_REF (access_stmt_info),
+              STMT_VINFO_DATA_REF (stmt_info), vNULL);
+         if (vect_slp_analyze_data_ref_dependence (ddr))
+           {
+             /* ???  If the dependence analysis failed we can resort to the
+                alias oracle which can handle more kinds of stmts.  */
+             free_dependence_relation (ddr);
+             return false;
+           }
+         free_dependence_relation (ddr);
+       }
+     }
+   return true;
+ }
+ 
+ 
  /* Function vect_analyze_data_ref_dependences.
  
     Examine all the data references in the basic-block, and make sure there
*************** vect_slp_analyze_data_ref_dependence (st
*** 590,610 ****
  bool
  vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
  {
-   struct data_dependence_relation *ddr;
-   unsigned int i;
- 
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
                       "=== vect_slp_analyze_data_ref_dependences ===\n");
  
!   if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
!                               &BB_VINFO_DDRS (bb_vinfo),
!                               vNULL, true))
!     return false;
  
!   FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr)
!     if (vect_slp_analyze_data_ref_dependence (ddr))
!       return false;
  
    return true;
  }
--- 633,677 ----
  bool
  vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
  {
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
                       "=== vect_slp_analyze_data_ref_dependences ===\n");
  
!   slp_instance instance;
!   slp_tree load;
!   unsigned int i, j;
!   for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
!     {
!       bool remove = false;
!       /* Verify we can sink loads to the vectorized stmt insert location.  */
!       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, load)
!       if (! vect_slp_analyze_node_dependences (instance, load))
!         {
!           remove = true;
!           break;
!         }
!       /* Verify we can sink stores to the vectorized stmt insert location.  */
!       slp_tree store = SLP_INSTANCE_TREE (instance);
!       if (!remove
!         && STMT_VINFO_DATA_REF
!               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (store)[0]))
!         && ! vect_slp_analyze_node_dependences (instance, store))
!       remove = true;
!       if (remove)
!       {
!         dump_printf_loc (MSG_NOTE, vect_location,
!                          "removing SLP instance operations starting from: ");
!         dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
!                           SLP_TREE_SCALAR_STMTS
!                             (SLP_INSTANCE_TREE (instance))[0], 0);
!         vect_free_slp_instance (instance);
!         BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
!       }
!       i++;
!     }
  
!   if (!BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
!     return false;
  
    return true;
  }
*************** again:
*** 3715,3721 ****
              }
  
            if (is_a <bb_vec_info> (vinfo))
!           break;
  
          if (gatherscatter != SG_NONE || simd_lane_access)
            {
--- 3782,3793 ----
              }
  
            if (is_a <bb_vec_info> (vinfo))
!           {
!             /* No vector type is fine, the ref can still participate
!                in dependence analysis, we just can't vectorize it.  */
!             STMT_VINFO_VECTORIZABLE (stmt_info) = false;
!             continue;
!           }
  
          if (gatherscatter != SG_NONE || simd_lane_access)
            {
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c.orig    2015-11-09 11:01:55.720175675 +0100
--- gcc/tree-vect-slp.c 2015-11-09 11:02:19.004433028 +0100
*************** vect_supported_load_permutation_p (slp_i
*** 1426,1432 ****
  
  /* Find the last store in SLP INSTANCE.  */
  
! static gimple *
  vect_find_last_scalar_stmt_in_slp (slp_tree node)
  {
    gimple *last = NULL, *stmt;
--- 1426,1432 ----
  
  /* Find the last store in SLP INSTANCE.  */
  
! gimple *
  vect_find_last_scalar_stmt_in_slp (slp_tree node)
  {
    gimple *last = NULL, *stmt;
Index: gcc/testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c.orig 2015-11-06 
12:11:17.347076131 +0100
--- gcc/testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c      2015-11-09 
11:02:19.061433658 +0100
*************** A sum(A a,A b)
*** 13,16 ****
    return a;
  }
  
! /* { dg-final { scan-tree-dump-times "not vectorized: more than one data ref 
in stmt" 0 "slp2" } } */
--- 13,16 ----
    return a;
  }
  
! /* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */

Reply via email to