On Mon, Jul 3, 2017 at 9:38 AM, Richard Sandiford
<richard.sandif...@linaro.org> wrote:
> This patch records the base alignment and misalignment in
> innermost_loop_behavior, to avoid the second-guessing that was
> previously done in vect_compute_data_ref_alignment.  It also makes
> vect_analyze_data_refs use dr_analyze_innermost, instead of having an
> almost-copy of the same code.
>
> I wasn't sure whether the alignments should be measured in bits
> (for consistency with most other interfaces) or in bytes (for consistency
> with DR_ALIGNED_TO, now DR_OFFSET_ALIGNMENT, and with *_ptr_info_alignment).
> I went for bytes because:
>
> - I think in practice most consumers are going to want bytes.
>   E.g. using bytes avoids having to mix TYPE_ALIGN and TYPE_ALIGN_UNIT
>   in vect_compute_data_ref_alignment.
>
> - It means that any bit-level paranoia is dealt with when building
>   the innermost_loop_behavior and doesn't get pushed down to consumers.
>
> Tested an aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Ok.

Thanks,
Richard.

> Richard
>
>
> 2017-07-03  Richard Sandiford  <richard.sandif...@linaro.org>
>
> gcc/
>         * tree-data-ref.h (innermost_loop_behavior): Add base_alignment
>         and base_misalignment fields.
>         (DR_BASE_ALIGNMENT, DR_BASE_MISALIGNMENT): New macros.
>         * tree-data-ref.c: Include builtins.h.
>         (dr_analyze_innermost): Set up the new innmost_loop_behavior fields.
>         * tree-vectorizer.h (STMT_VINFO_DR_BASE_ALIGNMENT): New macro.
>         (STMT_VINFO_DR_BASE_MISALIGNMENT): Likewise.
>         * tree-vect-data-refs.c: Include tree-cfg.h.
>         (vect_compute_data_ref_alignment): Use the new innermost_loop_behavior
>         fields instead of calculating an alignment here.
>         (vect_analyze_data_refs): Use dr_analyze_innermost.  Dump the new
>         innermost_loop_behavior fields.
>
> Index: gcc/tree-data-ref.h
> ===================================================================
> --- gcc/tree-data-ref.h 2017-07-03 07:52:14.194782203 +0100
> +++ gcc/tree-data-ref.h 2017-07-03 07:52:55.920272347 +0100
> @@ -52,6 +52,42 @@ struct innermost_loop_behavior
>    tree init;
>    tree step;
>
> +  /* BASE_ADDRESS is known to be misaligned by BASE_MISALIGNMENT bytes
> +     from an alignment boundary of BASE_ALIGNMENT bytes.  For example,
> +     if we had:
> +
> +       struct S __attribute__((aligned(16))) { ... };
> +
> +       char *ptr;
> +       ... *(struct S *) (ptr - 4) ...;
> +
> +     the information would be:
> +
> +       base_address:      ptr
> +       base_aligment:      16
> +       base_misalignment:   4
> +       init:               -4
> +
> +     where init cancels the base misalignment.  If instead we had a
> +     reference to a particular field:
> +
> +       struct S __attribute__((aligned(16))) { ... int f; ... };
> +
> +       char *ptr;
> +       ... ((struct S *) (ptr - 4))->f ...;
> +
> +     the information would be:
> +
> +       base_address:      ptr
> +       base_aligment:      16
> +       base_misalignment:   4
> +       init:               -4 + offsetof (S, f)
> +
> +     where base_address + init might also be misaligned, and by a different
> +     amount from base_address.  */
> +  unsigned int base_alignment;
> +  unsigned int base_misalignment;
> +
>    /* The largest power of two that divides OFFSET, capped to a suitably
>       high value if the offset is zero.  This is a byte rather than a bit
>       quantity.  */
> @@ -147,6 +183,8 @@ #define DR_OFFSET(DR)              (DR)-
>  #define DR_INIT(DR)                (DR)->innermost.init
>  #define DR_STEP(DR)                (DR)->innermost.step
>  #define DR_PTR_INFO(DR)            (DR)->alias.ptr_info
> +#define DR_BASE_ALIGNMENT(DR)      (DR)->innermost.base_alignment
> +#define DR_BASE_MISALIGNMENT(DR)   (DR)->innermost.base_misalignment
>  #define DR_OFFSET_ALIGNMENT(DR)    (DR)->innermost.offset_alignment
>  #define DR_STEP_ALIGNMENT(DR)      (DR)->innermost.step_alignment
>  #define DR_INNERMOST(DR)           (DR)->innermost
> Index: gcc/tree-data-ref.c
> ===================================================================
> --- gcc/tree-data-ref.c 2017-07-03 07:52:14.193782226 +0100
> +++ gcc/tree-data-ref.c 2017-07-03 07:52:55.920272347 +0100
> @@ -94,6 +94,7 @@ Software Foundation; either version 3, o
>  #include "dumpfile.h"
>  #include "tree-affine.h"
>  #include "params.h"
> +#include "builtins.h"
>
>  static struct datadep_stats
>  {
> @@ -802,11 +803,26 @@ dr_analyze_innermost (struct data_refere
>        return false;
>      }
>
> +  /* Calculate the alignment and misalignment for the inner reference.  */
> +  unsigned int HOST_WIDE_INT base_misalignment;
> +  unsigned int base_alignment;
> +  get_object_alignment_1 (base, &base_alignment, &base_misalignment);
> +
> +  /* There are no bitfield references remaining in BASE, so the values
> +     we got back must be whole bytes.  */
> +  gcc_assert (base_alignment % BITS_PER_UNIT == 0
> +             && base_misalignment % BITS_PER_UNIT == 0);
> +  base_alignment /= BITS_PER_UNIT;
> +  base_misalignment /= BITS_PER_UNIT;
> +
>    if (TREE_CODE (base) == MEM_REF)
>      {
>        if (!integer_zerop (TREE_OPERAND (base, 1)))
>         {
> +         /* Subtract MOFF from the base and add it to POFFSET instead.
> +            Adjust the misalignment to reflect the amount we subtracted.  */
>           offset_int moff = mem_ref_offset (base);
> +         base_misalignment -= moff.to_short_addr ();
>           tree mofft = wide_int_to_tree (sizetype, moff);
>           if (!poffset)
>             poffset = mofft;
> @@ -855,20 +871,46 @@ dr_analyze_innermost (struct data_refere
>      }
>
>    init = ssize_int (pbitpos / BITS_PER_UNIT);
> +
> +  /* Subtract any constant component from the base and add it to INIT 
> instead.
> +     Adjust the misalignment to reflect the amount we subtracted.  */
>    split_constant_offset (base_iv.base, &base_iv.base, &dinit);
> -  init =  size_binop (PLUS_EXPR, init, dinit);
> +  init = size_binop (PLUS_EXPR, init, dinit);
> +  base_misalignment -= TREE_INT_CST_LOW (dinit);
> +
>    split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
> -  init =  size_binop (PLUS_EXPR, init, dinit);
> +  init = size_binop (PLUS_EXPR, init, dinit);
>
>    step = size_binop (PLUS_EXPR,
>                      fold_convert (ssizetype, base_iv.step),
>                      fold_convert (ssizetype, offset_iv.step));
>
> -  drb->base_address = canonicalize_base_object_address (base_iv.base);
> +  base = canonicalize_base_object_address (base_iv.base);
> +
> +  /* See if get_pointer_alignment can guarantee a higher alignment than
> +     the one we calculated above.  */
> +  unsigned int HOST_WIDE_INT alt_misalignment;
> +  unsigned int alt_alignment;
> +  get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
> +
> +  /* As above, these values must be whole bytes.  */
> +  gcc_assert (alt_alignment % BITS_PER_UNIT == 0
> +             && alt_misalignment % BITS_PER_UNIT == 0);
> +  alt_alignment /= BITS_PER_UNIT;
> +  alt_misalignment /= BITS_PER_UNIT;
> +
> +  if (base_alignment < alt_alignment)
> +    {
> +      base_alignment = alt_alignment;
> +      base_misalignment = alt_misalignment;
> +    }
>
> +  drb->base_address = base;
>    drb->offset = fold_convert (ssizetype, offset_iv.base);
>    drb->init = init;
>    drb->step = step;
> +  drb->base_alignment = base_alignment;
> +  drb->base_misalignment = base_misalignment & (base_alignment - 1);
>    drb->offset_alignment = highest_pow2_factor (offset_iv.base);
>    drb->step_alignment = highest_pow2_factor (step);
>
> @@ -1084,6 +1126,9 @@ create_data_ref (loop_p nest, loop_p loo
>        print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
>        fprintf (dump_file, "\n\tstep: ");
>        print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
> +      fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
> +      fprintf (dump_file, "\n\tbase misalignment: %d",
> +              DR_BASE_MISALIGNMENT (dr));
>        fprintf (dump_file, "\n\toffset alignment: %d",
>                DR_OFFSET_ALIGNMENT (dr));
>        fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h       2017-07-03 07:52:14.196782157 +0100
> +++ gcc/tree-vectorizer.h       2017-07-03 07:52:55.921272300 +0100
> @@ -707,6 +707,9 @@ #define STMT_VINFO_DR_BASE_ADDRESS(S)
>  #define STMT_VINFO_DR_INIT(S)              (S)->dr_wrt_vec_loop.init
>  #define STMT_VINFO_DR_OFFSET(S)            (S)->dr_wrt_vec_loop.offset
>  #define STMT_VINFO_DR_STEP(S)              (S)->dr_wrt_vec_loop.step
> +#define STMT_VINFO_DR_BASE_ALIGNMENT(S)    
> (S)->dr_wrt_vec_loop.base_alignment
> +#define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \
> +  (S)->dr_wrt_vec_loop.base_misalignment
>  #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \
>    (S)->dr_wrt_vec_loop.offset_alignment
>  #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c   2017-07-03 07:52:14.194782203 +0100
> +++ gcc/tree-vect-data-refs.c   2017-07-03 07:52:55.921272300 +0100
> @@ -50,6 +50,7 @@ Software Foundation; either version 3, o
>  #include "expr.h"
>  #include "builtins.h"
>  #include "params.h"
> +#include "tree-cfg.h"
>
>  /* Return true if load- or store-lanes optab OPTAB is implemented for
>     COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
> @@ -667,8 +668,6 @@ vect_compute_data_ref_alignment (struct
>    struct loop *loop = NULL;
>    tree ref = DR_REF (dr);
>    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> -  tree base;
> -  unsigned HOST_WIDE_INT alignment;
>
>    if (dump_enabled_p ())
>      dump_printf_loc (MSG_NOTE, vect_location,
> @@ -728,48 +727,18 @@ vect_compute_data_ref_alignment (struct
>         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>                          "step doesn't divide the vector-size.\n");
>      }
> -  tree base_addr = drb->base_address;
>
> -  /* To look at alignment of the base we have to preserve an inner MEM_REF
> -     as that carries alignment information of the actual access.  */
> -  base = ref;
> -  while (handled_component_p (base))
> -    base = TREE_OPERAND (base, 0);
> -  unsigned int base_alignment = 0;
> -  unsigned HOST_WIDE_INT base_bitpos;
> -  get_object_alignment_1 (base, &base_alignment, &base_bitpos);
> -  /* As data-ref analysis strips the MEM_REF down to its base operand
> -     to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to
> -     adjust things to make base_alignment valid as the alignment of
> -     DR_BASE_ADDRESS.  */
> -  if (TREE_CODE (base) == MEM_REF)
> -    {
> -      /* Note all this only works if DR_BASE_ADDRESS is the same as
> -        MEM_REF operand zero, otherwise DR/SCEV analysis might have factored
> -        in other offsets.  We need to rework DR to compute the alingment
> -        of DR_BASE_ADDRESS as long as all information is still available.  */
> -      if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))
> -       {
> -         base_bitpos -= mem_ref_offset (base).to_short_addr () * 
> BITS_PER_UNIT;
> -         base_bitpos &= (base_alignment - 1);
> -       }
> -      else
> -       base_bitpos = BITS_PER_UNIT;
> -    }
> -  if (base_bitpos != 0)
> -    base_alignment = base_bitpos & -base_bitpos;
> -  /* Also look at the alignment of the base address DR analysis
> -     computed.  */
> -  unsigned int base_addr_alignment = get_pointer_alignment (base_addr);
> -  if (base_addr_alignment > base_alignment)
> -    base_alignment = base_addr_alignment;
> +  unsigned int base_alignment = drb->base_alignment;
> +  unsigned int base_misalignment = drb->base_misalignment;
> +  unsigned HOST_WIDE_INT vector_alignment = TYPE_ALIGN_UNIT (vectype);
> +  unsigned HOST_WIDE_INT element_alignment
> +    = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
>
> -  if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
> +  if (base_alignment >= element_alignment
> +      && (base_misalignment & (element_alignment - 1)) == 0)
>      DR_VECT_AUX (dr)->base_element_aligned = true;
>
> -  alignment = TYPE_ALIGN_UNIT (vectype);
> -
> -  if (drb->offset_alignment < alignment
> +  if (drb->offset_alignment < vector_alignment
>        || !step_preserves_misalignment_p
>        /* We need to know whether the step wrt the vectorized loop is
>          negative when computing the starting misalignment below.  */
> @@ -785,12 +754,13 @@ vect_compute_data_ref_alignment (struct
>        return true;
>      }
>
> -  if (base_alignment < TYPE_ALIGN (vectype))
> +  if (base_alignment < vector_alignment)
>      {
> -      base = base_addr;
> +      tree base = drb->base_address;
>        if (TREE_CODE (base) == ADDR_EXPR)
>         base = TREE_OPERAND (base, 0);
> -      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
> +      if (!vect_can_force_dr_alignment_p (base,
> +                                         vector_alignment * BITS_PER_UNIT))
>         {
>           if (dump_enabled_p ())
>             {
> @@ -828,24 +798,20 @@ vect_compute_data_ref_alignment (struct
>        DR_VECT_AUX (dr)->base_decl = base;
>        DR_VECT_AUX (dr)->base_misaligned = true;
>        DR_VECT_AUX (dr)->base_element_aligned = true;
> +      base_misalignment = 0;
>      }
> +  unsigned int misalignment = (base_misalignment
> +                              + TREE_INT_CST_LOW (drb->init));
>
>    /* If this is a backward running DR then first access in the larger
>       vectype actually is N-1 elements before the address in the DR.
>       Adjust misalign accordingly.  */
> -  tree misalign = drb->init;
>    if (tree_int_cst_sgn (drb->step) < 0)
> -    {
> -      tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
> -      /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
> -        otherwise we wouldn't be here.  */
> -      offset = fold_build2 (MULT_EXPR, ssizetype, offset, drb->step);
> -      /* PLUS because STEP was negative.  */
> -      misalign = size_binop (PLUS_EXPR, misalign, offset);
> -    }
> +    /* PLUS because STEP is negative.  */
> +    misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
> +                    * TREE_INT_CST_LOW (drb->step));
>
> -  SET_DR_MISALIGNMENT (dr,
> -                      wi::mod_floor (misalign, alignment, SIGNED).to_uhwi 
> ());
> +  SET_DR_MISALIGNMENT (dr, misalignment & (vector_alignment - 1));
>
>    if (dump_enabled_p ())
>      {
> @@ -3554,100 +3520,27 @@ vect_analyze_data_refs (vec_info *vinfo,
>          the outer-loop.  */
>        if (loop && nested_in_vect_loop_p (loop, stmt))
>         {
> -         tree outer_step, outer_base, outer_init;
> -         HOST_WIDE_INT pbitsize, pbitpos;
> -         tree poffset;
> -         machine_mode pmode;
> -         int punsignedp, preversep, pvolatilep;
> -         affine_iv base_iv, offset_iv;
> -         tree dinit;
> -
>           /* Build a reference to the first location accessed by the
> -            inner-loop: *(BASE+INIT).  (The first location is actually
> -            BASE+INIT+OFFSET, but we add OFFSET separately later).  */
> -          tree inner_base = build_fold_indirect_ref
> -                                (fold_build_pointer_plus (base, init));
> +            inner loop: *(BASE + INIT + OFFSET).  By construction,
> +            this address must be invariant in the inner loop, so we
> +            can consider it as being used in the outer loop.  */
> +         tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
> +                                         init, offset);
> +         tree init_addr = fold_build_pointer_plus (base, init_offset);
> +         tree init_ref = build_fold_indirect_ref (init_addr);
>
>           if (dump_enabled_p ())
>             {
>               dump_printf_loc (MSG_NOTE, vect_location,
> -                               "analyze in outer-loop: ");
> -             dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
> +                               "analyze in outer loop: ");
> +             dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
>               dump_printf (MSG_NOTE, "\n");
>             }
>
> -         outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
> -                                           &poffset, &pmode, &punsignedp,
> -                                           &preversep, &pvolatilep);
> -         gcc_assert (outer_base != NULL_TREE);
> -
> -         if (pbitpos % BITS_PER_UNIT != 0)
> -           {
> -             if (dump_enabled_p ())
> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                                 "failed: bit offset alignment.\n");
> -             return false;
> -           }
> -
> -         if (preversep)
> -           {
> -             if (dump_enabled_p ())
> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                                "failed: reverse storage order.\n");
> -             return false;
> -           }
> -
> -         outer_base = build_fold_addr_expr (outer_base);
> -         if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
> -                          &base_iv, false))
> -           {
> -             if (dump_enabled_p ())
> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                                 "failed: evolution of base is not 
> affine.\n");
> -             return false;
> -           }
> -
> -         if (offset)
> -           {
> -             if (poffset)
> -               poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
> -                                       poffset);
> -             else
> -               poffset = offset;
> -           }
> -
> -         if (!poffset)
> -           {
> -             offset_iv.base = ssize_int (0);
> -             offset_iv.step = ssize_int (0);
> -           }
> -         else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
> -                               &offset_iv, false))
> -           {
> -             if (dump_enabled_p ())
> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                                 "evolution of offset is not affine.\n");
> -             return false;
> -           }
> -
> -         outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
> -         split_constant_offset (base_iv.base, &base_iv.base, &dinit);
> -         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
> -         split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
> -         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
> -
> -         outer_step = size_binop (PLUS_EXPR,
> -                               fold_convert (ssizetype, base_iv.step),
> -                               fold_convert (ssizetype, offset_iv.step));
> -
> -         STMT_VINFO_DR_STEP (stmt_info) = outer_step;
> -         /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
> -         STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
> -         STMT_VINFO_DR_INIT (stmt_info) = outer_init;
> -         STMT_VINFO_DR_OFFSET (stmt_info) =
> -                               fold_convert (ssizetype, offset_iv.base);
> -         STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info)
> -           = highest_pow2_factor (offset_iv.base);
> +         if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
> +                                    init_ref, loop))
> +           /* dr_analyze_innermost already explained the failure.  */
> +           return false;
>
>            if (dump_enabled_p ())
>             {
> @@ -3665,6 +3558,10 @@ vect_analyze_data_refs (vec_info *vinfo,
>               dump_printf (MSG_NOTE, "\n\touter step: ");
>               dump_generic_expr (MSG_NOTE, TDF_SLIM,
>                                   STMT_VINFO_DR_STEP (stmt_info));
> +             dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n",
> +                          STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info));
> +             dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n",
> +                          STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info));
>               dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n",
>                            STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info));
>               dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",

Reply via email to