This patch records the base alignment in data_reference, to avoid the
second-guessing that was previously done in vect_compute_data_ref_alignment.
It also makes vect_analyze_data_refs use dr_analyze_innermost, instead
of having an almost-copy of the same code.

I'd originally tried to do the second part as a standalone patch,
but on its own it caused us to miscompute the base alignment (due to
the second-guessing not quite working).  This was previously latent
because the old code set STMT_VINFO_DR_ALIGNED_TO to a byte value,
whereas it should have been bits.

After the previous patch, the only thing that dr_analyze_innermost
read from the dr was the DR_REF.  I thought it would be better to pass
that in and make data_reference write-only.  This means that callers
like vect_analyze_data_refs don't have to set any fields first
(and thus not worry about *which* fields they should set).

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2017-06-28  Richard Sandiford  <richard.sandif...@linaro.org>

gcc/
        * tree-data-ref.h (data_reference): Add a base_alignment field.
        (DR_BASE_ALIGNMENT): New macro.
        (dr_analyze_innermost): Add a tree argument.
        * tree-data-ref.c: Include builtins.h.
        (dr_analyze_innermost): Take the tree reference as argument.
        Set up DR_BASE_ALIGNMENT.
        (create_data_ref): Update call accordingly.
        * tree-predcom.c (find_looparound_phi): Likewise.
        * tree-vectorizer.h (_stmt_vec_info): Add dr_base_alignment.
        (STMT_VINFO_DR_BASE_ALIGNMENT): New macro.
        * tree-vect-data-refs.c: Include tree-cfg.h.
        (vect_compute_data_ref_alignment): Use DR_BASE_ALIGNMENT instead
        of calculating an alignment here.
        (vect_analyze_data_refs): Use dr_analyze_innermost.  Record the
        base alignment in STMT_VINFO_DR_BASE_ALIGNMENT.

Index: gcc/tree-data-ref.h
===================================================================
--- gcc/tree-data-ref.h 2017-06-26 19:41:19.549571836 +0100
+++ gcc/tree-data-ref.h 2017-06-28 14:26:19.651051322 +0100
@@ -119,6 +119,10 @@ struct data_reference
   /* True when the data reference is in RHS of a stmt.  */
   bool is_read;
 
+  /* The alignment of INNERMOST.base_address, in bits.  This is logically
+     part of INNERMOST, but is kept here to avoid unnecessary padding.  */
+  unsigned int base_alignment;
+
   /* Behavior of the memory reference in the innermost loop.  */
   struct innermost_loop_behavior innermost;
 
@@ -139,6 +143,7 @@ #define DR_NUM_DIMENSIONS(DR)      DR_AC
 #define DR_IS_READ(DR)             (DR)->is_read
 #define DR_IS_WRITE(DR)            (!DR_IS_READ (DR))
 #define DR_BASE_ADDRESS(DR)        (DR)->innermost.base_address
+#define DR_BASE_ALIGNMENT(DR)      (DR)->base_alignment
 #define DR_OFFSET(DR)              (DR)->innermost.offset
 #define DR_INIT(DR)                (DR)->innermost.init
 #define DR_STEP(DR)                (DR)->innermost.step
@@ -322,7 +327,7 @@ #define DDR_DIST_VECT(DDR, I) \
 #define DDR_REVERSED_P(DDR) (DDR)->reversed_p
 
 
-bool dr_analyze_innermost (struct data_reference *, struct loop *);
+bool dr_analyze_innermost (struct data_reference *, tree, struct loop *);
 extern bool compute_data_dependences_for_loop (struct loop *, bool,
                                               vec<loop_p> *,
                                               vec<data_reference_p> *,
Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c 2017-06-28 14:26:12.946306736 +0100
+++ gcc/tree-data-ref.c 2017-06-28 14:26:19.651051322 +0100
@@ -94,6 +94,7 @@ Software Foundation; either version 3, o
 #include "dumpfile.h"
 #include "tree-affine.h"
 #include "params.h"
+#include "builtins.h"
 
 static struct datadep_stats
 {
@@ -749,7 +750,7 @@ canonicalize_base_object_address (tree a
   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 }
 
-/* Analyze the behavior of memory reference DR.  There are two modes:
+/* Analyze the behavior of memory reference REF.  There are two modes:
 
    - BB analysis.  In this case we simply split the address into base,
      init and offset components, without reference to any containing loop.
@@ -767,12 +768,13 @@ canonicalize_base_object_address (tree a
    dummy outermost loop.  In other cases perform loop analysis.
 
    Return true if the analysis succeeded and store the results in DR if so.
-   BB analysis can only fail for bitfield or reversed-storage accesses.  */
+   BB analysis can only fail for bitfield or reversed-storage accesses.
+
+   Note that DR is purely an output; the contents on input don't matter.  */
 
 bool
-dr_analyze_innermost (struct data_reference *dr, struct loop *loop)
+dr_analyze_innermost (struct data_reference *dr, tree ref, struct loop *loop)
 {
-  tree ref = DR_REF (dr);
   HOST_WIDE_INT pbitsize, pbitpos;
   tree base, poffset;
   machine_mode pmode;
@@ -802,6 +804,7 @@ dr_analyze_innermost (struct data_refere
       return false;
     }
 
+  unsigned int base_alignment = get_object_alignment (base);
   if (TREE_CODE (base) == MEM_REF)
     {
       if (!integer_zerop (TREE_OPERAND (base, 1)))
@@ -858,16 +861,31 @@ dr_analyze_innermost (struct data_refere
 
   init = ssize_int (pbitpos / BITS_PER_UNIT);
   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
-  init =  size_binop (PLUS_EXPR, init, dinit);
+
+  /* If the stripped offset has a lower alignment than the original base,
+     we need to adjust the alignment of the new base down to match.  */
+  unsigned int dinit_bits_low = ((unsigned int) TREE_INT_CST_LOW (dinit)
+                                * BITS_PER_UNIT);
+  if (dinit_bits_low != 0)
+    base_alignment = MIN (base_alignment, dinit_bits_low & -dinit_bits_low);
+  init = size_binop (PLUS_EXPR, init, dinit);
+
   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
-  init =  size_binop (PLUS_EXPR, init, dinit);
+  init = size_binop (PLUS_EXPR, init, dinit);
 
   step = size_binop (PLUS_EXPR,
                     fold_convert (ssizetype, base_iv.step),
                     fold_convert (ssizetype, offset_iv.step));
 
-  DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
+  base = canonicalize_base_object_address (base_iv.base);
+
+  /* See if get_pointer_alignment can guarantee a higher alignment than
+     the one we calculated above.  */
+  unsigned int alt_alignment = get_pointer_alignment (base);
+  base_alignment = MAX (base_alignment, alt_alignment);
 
+  DR_BASE_ADDRESS (dr) = base;
+  DR_BASE_ALIGNMENT (dr) = base_alignment;
   DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
   DR_INIT (dr) = init;
   DR_STEP (dr) = step;
@@ -1071,7 +1089,7 @@ create_data_ref (loop_p nest, loop_p loo
   DR_REF (dr) = memref;
   DR_IS_READ (dr) = is_read;
 
-  dr_analyze_innermost (dr, nest != NULL ? loop : NULL);
+  dr_analyze_innermost (dr, memref, nest != NULL ? loop : NULL);
   dr_analyze_indices (dr, nest, loop);
   dr_analyze_alias (dr);
 
Index: gcc/tree-predcom.c
===================================================================
--- gcc/tree-predcom.c  2017-05-18 07:51:11.896799736 +0100
+++ gcc/tree-predcom.c  2017-06-28 14:26:19.651051322 +0100
@@ -1149,7 +1149,7 @@ find_looparound_phi (struct loop *loop,
   memset (&init_dr, 0, sizeof (struct data_reference));
   DR_REF (&init_dr) = init_ref;
   DR_STMT (&init_dr) = phi;
-  if (!dr_analyze_innermost (&init_dr, loop))
+  if (!dr_analyze_innermost (&init_dr, init_ref, loop))
     return NULL;
 
   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h       2017-06-26 19:41:19.549571836 +0100
+++ gcc/tree-vectorizer.h       2017-06-28 14:26:19.653051245 +0100
@@ -553,7 +553,8 @@ typedef struct _stmt_vec_info {
   struct data_reference *data_ref_info;
 
   /* Information about the data-ref relative to this loop
-     nest (the loop that is being considered for vectorization).  */
+     nest (the loop that is being considered for vectorization).
+     See also dr_base_alignment.  */
   tree dr_base_address;
   tree dr_init;
   tree dr_offset;
@@ -652,6 +653,10 @@ typedef struct _stmt_vec_info {
 
   /* The number of scalar stmt references from active SLP instances.  */
   unsigned int num_slp_uses;
+
+  /* Logically belongs with dr_base_address etc., but is kept here to
+     avoid unnecessary padding.  */
+  unsigned int dr_base_alignment;
 } *stmt_vec_info;
 
 /* Information about a gather/scatter call.  */
@@ -711,6 +716,7 @@ #define STMT_VINFO_DR_INIT(S)
 #define STMT_VINFO_DR_OFFSET(S)            (S)->dr_offset
 #define STMT_VINFO_DR_STEP(S)              (S)->dr_step
 #define STMT_VINFO_DR_ALIGNED_TO(S)        (S)->dr_aligned_to
+#define STMT_VINFO_DR_BASE_ALIGNMENT(S)    (S)->dr_base_alignment
 
 #define STMT_VINFO_IN_PATTERN_P(S)         (S)->in_pattern_p
 #define STMT_VINFO_RELATED_STMT(S)         (S)->related_stmt
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c   2017-06-28 14:25:58.811888377 +0100
+++ gcc/tree-vect-data-refs.c   2017-06-28 14:26:19.652051284 +0100
@@ -50,6 +50,7 @@ Software Foundation; either version 3, o
 #include "expr.h"
 #include "builtins.h"
 #include "params.h"
+#include "tree-cfg.h"
 
 /* Return true if load- or store-lanes optab OPTAB is implemented for
    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
@@ -672,6 +673,7 @@ vect_compute_data_ref_alignment (struct
   tree aligned_to;
   tree step;
   unsigned HOST_WIDE_INT alignment;
+  unsigned int base_alignment;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -687,6 +689,7 @@ vect_compute_data_ref_alignment (struct
     misalign = DR_INIT (dr);
   aligned_to = DR_ALIGNED_TO (dr);
   base_addr = DR_BASE_ADDRESS (dr);
+  base_alignment = DR_BASE_ALIGNMENT (dr);
   vectype = STMT_VINFO_VECTYPE (stmt_info);
 
   /* In case the dataref is in an inner-loop of the loop that is being
@@ -708,6 +711,7 @@ vect_compute_data_ref_alignment (struct
          misalign = STMT_VINFO_DR_INIT (stmt_info);
          aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
          base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
+         base_alignment = STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info);
         }
       else
        {
@@ -738,40 +742,6 @@ vect_compute_data_ref_alignment (struct
        }
     }
 
-  /* To look at alignment of the base we have to preserve an inner MEM_REF
-     as that carries alignment information of the actual access.  */
-  base = ref;
-  while (handled_component_p (base))
-    base = TREE_OPERAND (base, 0);
-  unsigned int base_alignment = 0;
-  unsigned HOST_WIDE_INT base_bitpos;
-  get_object_alignment_1 (base, &base_alignment, &base_bitpos);
-  /* As data-ref analysis strips the MEM_REF down to its base operand
-     to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to
-     adjust things to make base_alignment valid as the alignment of
-     DR_BASE_ADDRESS.  */
-  if (TREE_CODE (base) == MEM_REF)
-    {
-      /* Note all this only works if DR_BASE_ADDRESS is the same as
-        MEM_REF operand zero, otherwise DR/SCEV analysis might have factored
-        in other offsets.  We need to rework DR to compute the alingment
-        of DR_BASE_ADDRESS as long as all information is still available.  */
-      if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))
-       {
-         base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;
-         base_bitpos &= (base_alignment - 1);
-       }
-      else
-       base_bitpos = BITS_PER_UNIT;
-    }
-  if (base_bitpos != 0)
-    base_alignment = base_bitpos & -base_bitpos;
-  /* Also look at the alignment of the base address DR analysis
-     computed.  */
-  unsigned int base_addr_alignment = get_pointer_alignment (base_addr);
-  if (base_addr_alignment > base_alignment)
-    base_alignment = base_addr_alignment;
-
   if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
     DR_VECT_AUX (dr)->base_element_aligned = true;
 
@@ -3560,100 +3530,35 @@ vect_analyze_data_refs (vec_info *vinfo,
         the outer-loop.  */
       if (loop && nested_in_vect_loop_p (loop, stmt))
        {
-         tree outer_step, outer_base, outer_init;
-         HOST_WIDE_INT pbitsize, pbitpos;
-         tree poffset;
-         machine_mode pmode;
-         int punsignedp, preversep, pvolatilep;
-         affine_iv base_iv, offset_iv;
-         tree dinit;
-
          /* Build a reference to the first location accessed by the
-            inner-loop: *(BASE+INIT).  (The first location is actually
-            BASE+INIT+OFFSET, but we add OFFSET separately later).  */
-          tree inner_base = build_fold_indirect_ref
-                                (fold_build_pointer_plus (base, init));
+            inner loop: *(BASE + INIT + OFFSET).  By construction,
+            this address must be invariant in the inner loop, so we
+            can consider it as being used in the outer loop.  */
+         tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
+                                         init, offset);
+         tree init_addr = fold_build_pointer_plus (base, init_offset);
+         tree init_ref = build_fold_indirect_ref (init_addr);
 
          if (dump_enabled_p ())
            {
              dump_printf_loc (MSG_NOTE, vect_location,
-                               "analyze in outer-loop: ");
-             dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
+                               "analyze in outer loop: ");
+             dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
              dump_printf (MSG_NOTE, "\n");
            }
 
-         outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
-                                           &poffset, &pmode, &punsignedp,
-                                           &preversep, &pvolatilep);
-         gcc_assert (outer_base != NULL_TREE);
-
-         if (pbitpos % BITS_PER_UNIT != 0)
-           {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "failed: bit offset alignment.\n");
-             return false;
-           }
-
-         if (preversep)
-           {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "failed: reverse storage order.\n");
-             return false;
-           }
-
-         outer_base = build_fold_addr_expr (outer_base);
-         if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
-                          &base_iv, false))
-           {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "failed: evolution of base is not affine.\n");
-             return false;
-           }
-
-         if (offset)
-           {
-             if (poffset)
-               poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
-                                       poffset);
-             else
-               poffset = offset;
-           }
-
-         if (!poffset)
-           {
-             offset_iv.base = ssize_int (0);
-             offset_iv.step = ssize_int (0);
-           }
-         else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
-                               &offset_iv, false))
-           {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "evolution of offset is not affine.\n");
-             return false;
-           }
+         data_reference init_dr;
+         if (!dr_analyze_innermost (&init_dr, init_ref, loop))
+           /* dr_analyze_innermost already explained the faiure.  */
+           return false;
 
-         outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
-         split_constant_offset (base_iv.base, &base_iv.base, &dinit);
-         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
-         split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
-         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
-
-         outer_step = size_binop (PLUS_EXPR,
-                               fold_convert (ssizetype, base_iv.step),
-                               fold_convert (ssizetype, offset_iv.step));
-
-         STMT_VINFO_DR_STEP (stmt_info) = outer_step;
-         /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
-         STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
-         STMT_VINFO_DR_INIT (stmt_info) = outer_init;
-         STMT_VINFO_DR_OFFSET (stmt_info) =
-                               fold_convert (ssizetype, offset_iv.base);
-         STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
-                               size_int (highest_pow2_factor (offset_iv.base));
+         STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = DR_BASE_ADDRESS (&init_dr);
+         STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info)
+           = DR_BASE_ALIGNMENT (&init_dr);
+         STMT_VINFO_DR_STEP (stmt_info) = DR_STEP (&init_dr);
+         STMT_VINFO_DR_INIT (stmt_info) = DR_INIT (&init_dr);
+         STMT_VINFO_DR_OFFSET (stmt_info) = DR_OFFSET (&init_dr);
+         STMT_VINFO_DR_ALIGNED_TO (stmt_info) = DR_ALIGNED_TO (&init_dr);
 
           if (dump_enabled_p ())
            {

Reply via email to