This patch records the base alignment in data_reference, to avoid the second-guessing that was previously done in vect_compute_data_ref_alignment. It also makes vect_analyze_data_refs use dr_analyze_innermost, instead of having an almost-copy of the same code.
I'd originally tried to do the second part as a standalone patch, but on its own it caused us to miscompute the base alignment (due to the second-guessing not quite working). This was previously latent because the old code set STMT_VINFO_DR_ALIGNED_TO to a byte value, whereas it should have been bits. After the previous patch, the only thing that dr_analyze_innermost read from the dr was the DR_REF. I thought it would be better to pass that in and make data_reference write-only. This means that callers like vect_analyze_data_refs don't have to set any fields first (and thus not worry about *which* fields they should set). Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Richard 2017-06-28 Richard Sandiford <richard.sandif...@linaro.org> gcc/ * tree-data-ref.h (data_reference): Add a base_alignment field. (DR_BASE_ALIGNMENT): New macro. (dr_analyze_innermost): Add a tree argument. * tree-data-ref.c: Include builtins.h. (dr_analyze_innermost): Take the tree reference as argument. Set up DR_BASE_ALIGNMENT. (create_data_ref): Update call accordingly. * tree-predcom.c (find_looparound_phi): Likewise. * tree-vectorizer.h (_stmt_vec_info): Add dr_base_alignment. (STMT_VINFO_DR_BASE_ALIGNMENT): New macro. * tree-vect-data-refs.c: Include tree-cfg.h. (vect_compute_data_ref_alignment): Use DR_BASE_ALIGNMENT instead of calculating an alignment here. (vect_analyze_data_refs): Use dr_analyze_innermost. Record the base alignment in STMT_VINFO_DR_BASE_ALIGNMENT. Index: gcc/tree-data-ref.h =================================================================== --- gcc/tree-data-ref.h 2017-06-26 19:41:19.549571836 +0100 +++ gcc/tree-data-ref.h 2017-06-28 14:26:19.651051322 +0100 @@ -119,6 +119,10 @@ struct data_reference /* True when the data reference is in RHS of a stmt. */ bool is_read; + /* The alignment of INNERMOST.base_address, in bits. This is logically + part of INNERMOST, but is kept here to avoid unnecessary padding. */ + unsigned int base_alignment; + /* Behavior of the memory reference in the innermost loop. */ struct innermost_loop_behavior innermost; @@ -139,6 +143,7 @@ #define DR_NUM_DIMENSIONS(DR) DR_AC #define DR_IS_READ(DR) (DR)->is_read #define DR_IS_WRITE(DR) (!DR_IS_READ (DR)) #define DR_BASE_ADDRESS(DR) (DR)->innermost.base_address +#define DR_BASE_ALIGNMENT(DR) (DR)->base_alignment #define DR_OFFSET(DR) (DR)->innermost.offset #define DR_INIT(DR) (DR)->innermost.init #define DR_STEP(DR) (DR)->innermost.step @@ -322,7 +327,7 @@ #define DDR_DIST_VECT(DDR, I) \ #define DDR_REVERSED_P(DDR) (DDR)->reversed_p -bool dr_analyze_innermost (struct data_reference *, struct loop *); +bool dr_analyze_innermost (struct data_reference *, tree, struct loop *); extern bool compute_data_dependences_for_loop (struct loop *, bool, vec<loop_p> *, vec<data_reference_p> *, Index: gcc/tree-data-ref.c =================================================================== --- gcc/tree-data-ref.c 2017-06-28 14:26:12.946306736 +0100 +++ gcc/tree-data-ref.c 2017-06-28 14:26:19.651051322 +0100 @@ -94,6 +94,7 @@ Software Foundation; either version 3, o #include "dumpfile.h" #include "tree-affine.h" #include "params.h" +#include "builtins.h" static struct datadep_stats { @@ -749,7 +750,7 @@ canonicalize_base_object_address (tree a return build_fold_addr_expr (TREE_OPERAND (addr, 0)); } -/* Analyze the behavior of memory reference DR. There are two modes: +/* Analyze the behavior of memory reference REF. There are two modes: - BB analysis. In this case we simply split the address into base, init and offset components, without reference to any containing loop. @@ -767,12 +768,13 @@ canonicalize_base_object_address (tree a dummy outermost loop. In other cases perform loop analysis. Return true if the analysis succeeded and store the results in DR if so. - BB analysis can only fail for bitfield or reversed-storage accesses. */ + BB analysis can only fail for bitfield or reversed-storage accesses. + + Note that DR is purely an output; the contents on input don't matter. */ bool -dr_analyze_innermost (struct data_reference *dr, struct loop *loop) +dr_analyze_innermost (struct data_reference *dr, tree ref, struct loop *loop) { - tree ref = DR_REF (dr); HOST_WIDE_INT pbitsize, pbitpos; tree base, poffset; machine_mode pmode; @@ -802,6 +804,7 @@ dr_analyze_innermost (struct data_refere return false; } + unsigned int base_alignment = get_object_alignment (base); if (TREE_CODE (base) == MEM_REF) { if (!integer_zerop (TREE_OPERAND (base, 1))) @@ -858,16 +861,31 @@ dr_analyze_innermost (struct data_refere init = ssize_int (pbitpos / BITS_PER_UNIT); split_constant_offset (base_iv.base, &base_iv.base, &dinit); - init = size_binop (PLUS_EXPR, init, dinit); + + /* If the stripped offset has a lower alignment than the original base, + we need to adjust the alignment of the new base down to match. */ + unsigned int dinit_bits_low = ((unsigned int) TREE_INT_CST_LOW (dinit) + * BITS_PER_UNIT); + if (dinit_bits_low != 0) + base_alignment = MIN (base_alignment, dinit_bits_low & -dinit_bits_low); + init = size_binop (PLUS_EXPR, init, dinit); + split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); - init = size_binop (PLUS_EXPR, init, dinit); + init = size_binop (PLUS_EXPR, init, dinit); step = size_binop (PLUS_EXPR, fold_convert (ssizetype, base_iv.step), fold_convert (ssizetype, offset_iv.step)); - DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base); + base = canonicalize_base_object_address (base_iv.base); + + /* See if get_pointer_alignment can guarantee a higher alignment than + the one we calculated above. */ + unsigned int alt_alignment = get_pointer_alignment (base); + base_alignment = MAX (base_alignment, alt_alignment); + DR_BASE_ADDRESS (dr) = base; + DR_BASE_ALIGNMENT (dr) = base_alignment; DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base); DR_INIT (dr) = init; DR_STEP (dr) = step; @@ -1071,7 +1089,7 @@ create_data_ref (loop_p nest, loop_p loo DR_REF (dr) = memref; DR_IS_READ (dr) = is_read; - dr_analyze_innermost (dr, nest != NULL ? loop : NULL); + dr_analyze_innermost (dr, memref, nest != NULL ? loop : NULL); dr_analyze_indices (dr, nest, loop); dr_analyze_alias (dr); Index: gcc/tree-predcom.c =================================================================== --- gcc/tree-predcom.c 2017-05-18 07:51:11.896799736 +0100 +++ gcc/tree-predcom.c 2017-06-28 14:26:19.651051322 +0100 @@ -1149,7 +1149,7 @@ find_looparound_phi (struct loop *loop, memset (&init_dr, 0, sizeof (struct data_reference)); DR_REF (&init_dr) = init_ref; DR_STMT (&init_dr) = phi; - if (!dr_analyze_innermost (&init_dr, loop)) + if (!dr_analyze_innermost (&init_dr, init_ref, loop)) return NULL; if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h 2017-06-26 19:41:19.549571836 +0100 +++ gcc/tree-vectorizer.h 2017-06-28 14:26:19.653051245 +0100 @@ -553,7 +553,8 @@ typedef struct _stmt_vec_info { struct data_reference *data_ref_info; /* Information about the data-ref relative to this loop - nest (the loop that is being considered for vectorization). */ + nest (the loop that is being considered for vectorization). + See also dr_base_alignment. */ tree dr_base_address; tree dr_init; tree dr_offset; @@ -652,6 +653,10 @@ typedef struct _stmt_vec_info { /* The number of scalar stmt references from active SLP instances. */ unsigned int num_slp_uses; + + /* Logically belongs with dr_base_address etc., but is kept here to + avoid unnecessary padding. */ + unsigned int dr_base_alignment; } *stmt_vec_info; /* Information about a gather/scatter call. */ @@ -711,6 +716,7 @@ #define STMT_VINFO_DR_INIT(S) #define STMT_VINFO_DR_OFFSET(S) (S)->dr_offset #define STMT_VINFO_DR_STEP(S) (S)->dr_step #define STMT_VINFO_DR_ALIGNED_TO(S) (S)->dr_aligned_to +#define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_base_alignment #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c 2017-06-28 14:25:58.811888377 +0100 +++ gcc/tree-vect-data-refs.c 2017-06-28 14:26:19.652051284 +0100 @@ -50,6 +50,7 @@ Software Foundation; either version 3, o #include "expr.h" #include "builtins.h" #include "params.h" +#include "tree-cfg.h" /* Return true if load- or store-lanes optab OPTAB is implemented for COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ @@ -672,6 +673,7 @@ vect_compute_data_ref_alignment (struct tree aligned_to; tree step; unsigned HOST_WIDE_INT alignment; + unsigned int base_alignment; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -687,6 +689,7 @@ vect_compute_data_ref_alignment (struct misalign = DR_INIT (dr); aligned_to = DR_ALIGNED_TO (dr); base_addr = DR_BASE_ADDRESS (dr); + base_alignment = DR_BASE_ALIGNMENT (dr); vectype = STMT_VINFO_VECTYPE (stmt_info); /* In case the dataref is in an inner-loop of the loop that is being @@ -708,6 +711,7 @@ vect_compute_data_ref_alignment (struct misalign = STMT_VINFO_DR_INIT (stmt_info); aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info); base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info); + base_alignment = STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info); } else { @@ -738,40 +742,6 @@ vect_compute_data_ref_alignment (struct } } - /* To look at alignment of the base we have to preserve an inner MEM_REF - as that carries alignment information of the actual access. */ - base = ref; - while (handled_component_p (base)) - base = TREE_OPERAND (base, 0); - unsigned int base_alignment = 0; - unsigned HOST_WIDE_INT base_bitpos; - get_object_alignment_1 (base, &base_alignment, &base_bitpos); - /* As data-ref analysis strips the MEM_REF down to its base operand - to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to - adjust things to make base_alignment valid as the alignment of - DR_BASE_ADDRESS. */ - if (TREE_CODE (base) == MEM_REF) - { - /* Note all this only works if DR_BASE_ADDRESS is the same as - MEM_REF operand zero, otherwise DR/SCEV analysis might have factored - in other offsets. We need to rework DR to compute the alingment - of DR_BASE_ADDRESS as long as all information is still available. */ - if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0)) - { - base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT; - base_bitpos &= (base_alignment - 1); - } - else - base_bitpos = BITS_PER_UNIT; - } - if (base_bitpos != 0) - base_alignment = base_bitpos & -base_bitpos; - /* Also look at the alignment of the base address DR analysis - computed. */ - unsigned int base_addr_alignment = get_pointer_alignment (base_addr); - if (base_addr_alignment > base_alignment) - base_alignment = base_addr_alignment; - if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype))) DR_VECT_AUX (dr)->base_element_aligned = true; @@ -3560,100 +3530,35 @@ vect_analyze_data_refs (vec_info *vinfo, the outer-loop. */ if (loop && nested_in_vect_loop_p (loop, stmt)) { - tree outer_step, outer_base, outer_init; - HOST_WIDE_INT pbitsize, pbitpos; - tree poffset; - machine_mode pmode; - int punsignedp, preversep, pvolatilep; - affine_iv base_iv, offset_iv; - tree dinit; - /* Build a reference to the first location accessed by the - inner-loop: *(BASE+INIT). (The first location is actually - BASE+INIT+OFFSET, but we add OFFSET separately later). */ - tree inner_base = build_fold_indirect_ref - (fold_build_pointer_plus (base, init)); + inner loop: *(BASE + INIT + OFFSET). By construction, + this address must be invariant in the inner loop, so we + can consider it as being used in the outer loop. */ + tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), + init, offset); + tree init_addr = fold_build_pointer_plus (base, init_offset); + tree init_ref = build_fold_indirect_ref (init_addr); if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, - "analyze in outer-loop: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base); + "analyze in outer loop: "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref); dump_printf (MSG_NOTE, "\n"); } - outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos, - &poffset, &pmode, &punsignedp, - &preversep, &pvolatilep); - gcc_assert (outer_base != NULL_TREE); - - if (pbitpos % BITS_PER_UNIT != 0) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "failed: bit offset alignment.\n"); - return false; - } - - if (preversep) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "failed: reverse storage order.\n"); - return false; - } - - outer_base = build_fold_addr_expr (outer_base); - if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base, - &base_iv, false)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "failed: evolution of base is not affine.\n"); - return false; - } - - if (offset) - { - if (poffset) - poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, - poffset); - else - poffset = offset; - } - - if (!poffset) - { - offset_iv.base = ssize_int (0); - offset_iv.step = ssize_int (0); - } - else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset, - &offset_iv, false)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "evolution of offset is not affine.\n"); - return false; - } + data_reference init_dr; + if (!dr_analyze_innermost (&init_dr, init_ref, loop)) + /* dr_analyze_innermost already explained the faiure. */ + return false; - outer_init = ssize_int (pbitpos / BITS_PER_UNIT); - split_constant_offset (base_iv.base, &base_iv.base, &dinit); - outer_init = size_binop (PLUS_EXPR, outer_init, dinit); - split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); - outer_init = size_binop (PLUS_EXPR, outer_init, dinit); - - outer_step = size_binop (PLUS_EXPR, - fold_convert (ssizetype, base_iv.step), - fold_convert (ssizetype, offset_iv.step)); - - STMT_VINFO_DR_STEP (stmt_info) = outer_step; - /* FIXME: Use canonicalize_base_object_address (base_iv.base); */ - STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base; - STMT_VINFO_DR_INIT (stmt_info) = outer_init; - STMT_VINFO_DR_OFFSET (stmt_info) = - fold_convert (ssizetype, offset_iv.base); - STMT_VINFO_DR_ALIGNED_TO (stmt_info) = - size_int (highest_pow2_factor (offset_iv.base)); + STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = DR_BASE_ADDRESS (&init_dr); + STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info) + = DR_BASE_ALIGNMENT (&init_dr); + STMT_VINFO_DR_STEP (stmt_info) = DR_STEP (&init_dr); + STMT_VINFO_DR_INIT (stmt_info) = DR_INIT (&init_dr); + STMT_VINFO_DR_OFFSET (stmt_info) = DR_OFFSET (&init_dr); + STMT_VINFO_DR_ALIGNED_TO (stmt_info) = DR_ALIGNED_TO (&init_dr); if (dump_enabled_p ()) {