Hi Richard,

On Fri, 17 May 2019 at 18:47, Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org> writes:
> > [...]
> >> > +    {
> >> > +      struct mem_address parts = {NULL_TREE, integer_one_node,
> >> > +                               NULL_TREE, NULL_TREE, NULL_TREE};
> >>
> >> Might be better to use "= {}" and initialise the fields that matter by
> >> assignment.  As it stands this uses integer_one_node as the base, but I
> >> couldn't tell if that was deliberate.
> >
> > I just copied this part from get_address_cost, similar to what is done
> > there.
>
> Ah, sorry :-)
>
> > I have now changed the way you suggested but using the values
> > used in get_address_cost.
>
> Thanks.
>
> > [...]
> > @@ -3479,6 +3481,35 @@ add_iv_candidate_derived_from_uses (struct 
> > ivopts_data *data)
> >    data->iv_common_cands.truncate (0);
> >  }
> >
> > +/* Return the preferred mem scale factor for accessing MEM_MODE
> > +   of BASE in LOOP.  */
> > +static unsigned int
> > +preferred_mem_scale_factor (struct loop *loop,
> > +                         tree base, machine_mode mem_mode)
>
> IMO this should live in tree-ssa-address.c instead.
>
> The only use of "loop" is to test for size vs. speed, but other callers
> might want to make that decision based on individual blocks, so I think
> it would make sense to pass a "speed" bool instead.  Probably also worth
> making it the last parameter, so that the order is consistent with
> address_cost (though probably then inconsistent with something else :-)).
>
> > [...]
> > @@ -3500,6 +3531,28 @@ add_iv_candidate_for_use (struct ivopts_data *data, 
> > struct iv_use *use)
> >      basetype = sizetype;
> >    record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
> >
> > +  /* Compare the cost of an address with an unscaled index with the cost of
> > +    an address with a scaled index and add candidate if useful. */
> > +  if (use != NULL
> > +      && poly_int_tree_p (iv->step)
> > +      && tree_fits_poly_int64_p (iv->step)
> > +      && address_p (use->type))
> > +    {
> > +      poly_int64 new_step;
> > +      poly_int64 poly_step = tree_to_poly_int64 (iv->step);
>
> This should be:
>
>   poly_int64 step;
>   if (use != NULL
>       && poly_int_tree_p (iv->step, &step)
>       && address_p (use->type))
>     {
>       poly_int64 new_step;
>
> > +      unsigned int fact
> > +     = preferred_mem_scale_factor (data->current_loop,
> > +                                    use->iv->base,
> > +                                    TYPE_MODE (use->mem_type));
> > +
> > +      if ((fact != 1)
> > +       && multiple_p (poly_step, fact, &new_step))
>
> Should be no brackets around "fact != 1".
>
> > [...]
>
> Looks really good to me otherwise, thanks.  Bin, any comments?
Revised patch which handles the above review comments is attached.

Thanks,
Kugan

> Richard
From 6a146662fab39de876de332bacbb1a3300caefb8 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org>
Date: Wed, 15 May 2019 09:16:43 +1000
Subject: [PATCH 1/2] Add support for IVOPT

gcc/ChangeLog:

2019-05-15  Kugan Vivekanandarajah  <kugan.vivekanandara...@linaro.org>

	PR target/88834
	* tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
	IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
	(get_alias_ptr_type_for_ptr_address): Likewise.
	(add_iv_candidate_for_use): Add scaled index candidate if useful.
	* tree-ssa-address.c (preferred_mem_scale_factor): New.

Change-Id: Ie47b1722dc4fb430f07dadb8a58385759e75df58
---
 gcc/tree-ssa-address.c     | 28 ++++++++++++++++++++++++++++
 gcc/tree-ssa-address.h     |  3 +++
 gcc/tree-ssa-loop-ivopts.c | 26 +++++++++++++++++++++++++-
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c
index 1c17e93..fdb6619 100644
--- a/gcc/tree-ssa-address.c
+++ b/gcc/tree-ssa-address.c
@@ -1127,6 +1127,34 @@ maybe_fold_tmr (tree ref)
   return new_ref;
 }
 
+/* Return the preferred mem scale factor for accessing MEM_MODE
+   of BASE which is optimized for SPEED.  */
+unsigned int
+preferred_mem_scale_factor (tree base, machine_mode mem_mode,
+			    bool speed)
+{
+  struct mem_address parts = {};
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base));
+  unsigned int fact = GET_MODE_UNIT_SIZE (mem_mode);
+
+  /* Addressing mode "base + index".  */
+  parts.index = integer_one_node;
+  parts.base = integer_one_node;
+  rtx addr = addr_for_mem_ref (&parts, as, false);
+  unsigned cost = address_cost (addr, mem_mode, as, speed);
+
+  /* Addressing mode "base + index << scale".  */
+  parts.step = wide_int_to_tree (sizetype, fact);
+  addr = addr_for_mem_ref (&parts, as, false);
+  unsigned new_cost = address_cost (addr, mem_mode, as, speed);
+
+  /* Compare the cost of an address with an unscaled index with
+     a scaled index and return factor if useful. */
+  if (new_cost < cost)
+    return GET_MODE_UNIT_SIZE (mem_mode);
+  return 1;
+}
+
 /* Dump PARTS to FILE.  */
 
 extern void dump_mem_address (FILE *, struct mem_address *);
diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h
index 6fa4eae..9812f36 100644
--- a/gcc/tree-ssa-address.h
+++ b/gcc/tree-ssa-address.h
@@ -39,4 +39,7 @@ tree create_mem_ref (gimple_stmt_iterator *, tree,
 extern void copy_ref_info (tree, tree);
 tree maybe_fold_tmr (tree);
 
+extern unsigned int preferred_mem_scale_factor (tree base,
+						machine_mode mem_mode,
+						bool speed);
 #endif /* GCC_TREE_SSA_ADDRESS_H */
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 9864b59..e6462fe 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
   switch (gimple_call_internal_fn (call))
     {
     case IFN_MASK_LOAD:
+    case IFN_MASK_LOAD_LANES:
       if (op_p == gimple_call_arg_ptr (call, 0))
 	return TREE_TYPE (gimple_call_lhs (call));
       return NULL_TREE;
 
     case IFN_MASK_STORE:
+    case IFN_MASK_STORE_LANES:
       if (op_p == gimple_call_arg_ptr (call, 0))
 	return TREE_TYPE (gimple_call_arg (call, 3));
       return NULL_TREE;
@@ -3479,7 +3481,7 @@ add_iv_candidate_derived_from_uses (struct ivopts_data *data)
   data->iv_common_cands.truncate (0);
 }
 
-/* Adds candidates based on the value of USE's iv.  */
+  /* Adds candidates based on the value of USE's iv.  */
 
 static void
 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
@@ -3500,6 +3502,26 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
     basetype = sizetype;
   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
 
+  /* Compare the cost of an address with an unscaled index with the cost of
+    an address with a scaled index and add candidate if useful. */
+  poly_int64 step;
+  if (use != NULL
+      && poly_int_tree_p (iv->step, &step)
+      && address_p (use->type))
+    {
+      poly_int64 new_step;
+      unsigned int fact = preferred_mem_scale_factor
+	(use->iv->base,
+	 TYPE_MODE (use->mem_type),
+	 optimize_loop_for_speed_p (data->current_loop));
+
+      if (fact != 1
+	  && multiple_p (step, fact, &new_step))
+	add_candidate (data, size_int (0),
+		       wide_int_to_tree (sizetype, new_step),
+		       true, NULL);
+    }
+
   /* Record common candidate with constant offset stripped in base.
      Like the use itself, we also add candidate directly for it.  */
   base = strip_offset (iv->base, &offset);
@@ -7112,6 +7134,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use)
     {
     case IFN_MASK_LOAD:
     case IFN_MASK_STORE:
+    case IFN_MASK_LOAD_LANES:
+    case IFN_MASK_STORE_LANES:
       /* The second argument contains the correct alias type.  */
       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
       return TREE_TYPE (gimple_call_arg (call, 1));
-- 
2.7.4

Reply via email to