Hi Richard,
On Fri, 17 May 2019 at 18:47, Richard Sandiford <richard.sandif...@arm.com> wrote: > > Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org> writes: > > [...] > >> > + { > >> > + struct mem_address parts = {NULL_TREE, integer_one_node, > >> > + NULL_TREE, NULL_TREE, NULL_TREE}; > >> > >> Might be better to use "= {}" and initialise the fields that matter by > >> assignment. As it stands this uses integer_one_node as the base, but I > >> couldn't tell if that was deliberate. > > > > I just copied this part from get_address_cost, similar to what is done > > there. > > Ah, sorry :-) > > > I have now changed the way you suggested but using the values > > used in get_address_cost. > > Thanks. > > > [...] > > @@ -3479,6 +3481,35 @@ add_iv_candidate_derived_from_uses (struct > > ivopts_data *data) > > data->iv_common_cands.truncate (0); > > } > > > > +/* Return the preferred mem scale factor for accessing MEM_MODE > > + of BASE in LOOP. */ > > +static unsigned int > > +preferred_mem_scale_factor (struct loop *loop, > > + tree base, machine_mode mem_mode) > > IMO this should live in tree-ssa-address.c instead. > > The only use of "loop" is to test for size vs. speed, but other callers > might want to make that decision based on individual blocks, so I think > it would make sense to pass a "speed" bool instead. Probably also worth > making it the last parameter, so that the order is consistent with > address_cost (though probably then inconsistent with something else :-)). > > > [...] > > @@ -3500,6 +3531,28 @@ add_iv_candidate_for_use (struct ivopts_data *data, > > struct iv_use *use) > > basetype = sizetype; > > record_common_cand (data, build_int_cst (basetype, 0), iv->step, use); > > > > + /* Compare the cost of an address with an unscaled index with the cost of > > + an address with a scaled index and add candidate if useful. */ > > + if (use != NULL > > + && poly_int_tree_p (iv->step) > > + && tree_fits_poly_int64_p (iv->step) > > + && address_p (use->type)) > > + { > > + poly_int64 new_step; > > + poly_int64 poly_step = tree_to_poly_int64 (iv->step); > > This should be: > > poly_int64 step; > if (use != NULL > && poly_int_tree_p (iv->step, &step) > && address_p (use->type)) > { > poly_int64 new_step; > > > + unsigned int fact > > + = preferred_mem_scale_factor (data->current_loop, > > + use->iv->base, > > + TYPE_MODE (use->mem_type)); > > + > > + if ((fact != 1) > > + && multiple_p (poly_step, fact, &new_step)) > > Should be no brackets around "fact != 1". > > > [...] > > Looks really good to me otherwise, thanks. Bin, any comments? Revised patch which handles the above review comments is attached. Thanks, Kugan > Richard
From 6a146662fab39de876de332bacbb1a3300caefb8 Mon Sep 17 00:00:00 2001 From: Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org> Date: Wed, 15 May 2019 09:16:43 +1000 Subject: [PATCH 1/2] Add support for IVOPT gcc/ChangeLog: 2019-05-15 Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org> PR target/88834 * tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES. (get_alias_ptr_type_for_ptr_address): Likewise. (add_iv_candidate_for_use): Add scaled index candidate if useful. * tree-ssa-address.c (preferred_mem_scale_factor): New. Change-Id: Ie47b1722dc4fb430f07dadb8a58385759e75df58 --- gcc/tree-ssa-address.c | 28 ++++++++++++++++++++++++++++ gcc/tree-ssa-address.h | 3 +++ gcc/tree-ssa-loop-ivopts.c | 26 +++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c index 1c17e93..fdb6619 100644 --- a/gcc/tree-ssa-address.c +++ b/gcc/tree-ssa-address.c @@ -1127,6 +1127,34 @@ maybe_fold_tmr (tree ref) return new_ref; } +/* Return the preferred mem scale factor for accessing MEM_MODE + of BASE which is optimized for SPEED. */ +unsigned int +preferred_mem_scale_factor (tree base, machine_mode mem_mode, + bool speed) +{ + struct mem_address parts = {}; + addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base)); + unsigned int fact = GET_MODE_UNIT_SIZE (mem_mode); + + /* Addressing mode "base + index". */ + parts.index = integer_one_node; + parts.base = integer_one_node; + rtx addr = addr_for_mem_ref (&parts, as, false); + unsigned cost = address_cost (addr, mem_mode, as, speed); + + /* Addressing mode "base + index << scale". */ + parts.step = wide_int_to_tree (sizetype, fact); + addr = addr_for_mem_ref (&parts, as, false); + unsigned new_cost = address_cost (addr, mem_mode, as, speed); + + /* Compare the cost of an address with an unscaled index with + a scaled index and return factor if useful. */ + if (new_cost < cost) + return GET_MODE_UNIT_SIZE (mem_mode); + return 1; +} + /* Dump PARTS to FILE. */ extern void dump_mem_address (FILE *, struct mem_address *); diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h index 6fa4eae..9812f36 100644 --- a/gcc/tree-ssa-address.h +++ b/gcc/tree-ssa-address.h @@ -39,4 +39,7 @@ tree create_mem_ref (gimple_stmt_iterator *, tree, extern void copy_ref_info (tree, tree); tree maybe_fold_tmr (tree); +extern unsigned int preferred_mem_scale_factor (tree base, + machine_mode mem_mode, + bool speed); #endif /* GCC_TREE_SSA_ADDRESS_H */ diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 9864b59..e6462fe 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p) switch (gimple_call_internal_fn (call)) { case IFN_MASK_LOAD: + case IFN_MASK_LOAD_LANES: if (op_p == gimple_call_arg_ptr (call, 0)) return TREE_TYPE (gimple_call_lhs (call)); return NULL_TREE; case IFN_MASK_STORE: + case IFN_MASK_STORE_LANES: if (op_p == gimple_call_arg_ptr (call, 0)) return TREE_TYPE (gimple_call_arg (call, 3)); return NULL_TREE; @@ -3479,7 +3481,7 @@ add_iv_candidate_derived_from_uses (struct ivopts_data *data) data->iv_common_cands.truncate (0); } -/* Adds candidates based on the value of USE's iv. */ + /* Adds candidates based on the value of USE's iv. */ static void add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use) @@ -3500,6 +3502,26 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use) basetype = sizetype; record_common_cand (data, build_int_cst (basetype, 0), iv->step, use); + /* Compare the cost of an address with an unscaled index with the cost of + an address with a scaled index and add candidate if useful. */ + poly_int64 step; + if (use != NULL + && poly_int_tree_p (iv->step, &step) + && address_p (use->type)) + { + poly_int64 new_step; + unsigned int fact = preferred_mem_scale_factor + (use->iv->base, + TYPE_MODE (use->mem_type), + optimize_loop_for_speed_p (data->current_loop)); + + if (fact != 1 + && multiple_p (step, fact, &new_step)) + add_candidate (data, size_int (0), + wide_int_to_tree (sizetype, new_step), + true, NULL); + } + /* Record common candidate with constant offset stripped in base. Like the use itself, we also add candidate directly for it. */ base = strip_offset (iv->base, &offset); @@ -7112,6 +7134,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use) { case IFN_MASK_LOAD: case IFN_MASK_STORE: + case IFN_MASK_LOAD_LANES: + case IFN_MASK_STORE_LANES: /* The second argument contains the correct alias type. */ gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0)); return TREE_TYPE (gimple_call_arg (call, 1)); -- 2.7.4