Hi,

This patch adjusts vect_gather_scatter_fn_p to always check an offset
type with swapped signedness (vs. the original offset argument).
If the target supports the gather/scatter with the new offset type the
offset is converted to it before emitting the gather/scatter.  In the
same way the costs for the conversion are added.

This was split off and changed from my previous patch that did both, scale and offset signedness at once.

Bootstrapped on x86 and power10.  Regtested on rv64gcv_zvl512b and aarch64.

Regards
Robin

gcc/ChangeLog:

        * tree-vect-data-refs.cc (vect_gather_scatter_fn_p): Add
        sign_swap argument and try with swapped signedness.
        (vect_gather_scatter_fn_1): New helper function that checks for
        target support.
        (vect_check_gather_scatter): Pass sign_swap argument.
        * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset):
        Ditto.
        (get_load_store_type): Check for sign swap.
        (vectorizable_store): Handle sign swap.
        (vectorizable_load): Ditto.
        * tree-vectorizer.h (struct vect_load_store_data): Add type
        with swapped signedness.
        (struct gather_scatter_info):
        (vect_gather_scatter_fn_p): Add sign_swap argument.
---
gcc/tree-vect-data-refs.cc | 96 ++++++++++++++++++++++++++++----------
gcc/tree-vect-stmts.cc     | 44 +++++++++++++++--
gcc/tree-vectorizer.h      |  3 +-
3 files changed, 115 insertions(+), 28 deletions(-)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index a31ff93bbd3..2c6ae2239c8 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4425,32 +4425,17 @@ vect_prune_runtime_alias_test_list (loop_vec_info 
loop_vinfo)
  return opt_result::success ();
}

-/* Check whether we can use an internal function for a gather load
-   or scatter store.  READ_P is true for loads and false for stores.
-   MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
-   the type of the memory elements being loaded or stored.  OFFSET_TYPE
-   is the type of the offset that is being applied to the invariant
-   base address.  If OFFSET_TYPE is scalar the function chooses an
-   appropriate vector type for it.  SCALE is the amount by which the
-   offset should be multiplied *after* it has been converted to address width.
+/*
+   Helper for vect_gather_scatter_fn that checks if there is a supported
+   gather/scatter with the given parameters.  */

-   Return true if the function is supported, storing the function id in
-   *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
-
-   If we can use gather and store the possible else values in ELSVALS.  */
-
-bool
-vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
+static bool
+vect_gather_scatter_fn_1 (vec_info *vinfo, bool read_p, bool masked_p,
                          tree vectype, tree memory_type, tree offset_type,
                          int scale, internal_fn *ifn_out,
                          tree *offset_vectype_out, vec<int> *elsvals)
{
-  unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
  unsigned int element_bits = vector_element_bits (vectype);
-  if (element_bits != memory_bits)
-    /* For now the vector elements must be the same width as the
-       memory elements.  */
-    return false;

  /* Work out which function we need.  */
  internal_fn ifn, alt_ifn, alt_ifn2;
@@ -4528,6 +4513,66 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
bool masked_p,
    }
}

+/* Check whether we can use an internal function for a gather load
+   or scatter store.  READ_P is true for loads and false for stores.
+   MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
+   the type of the memory elements being loaded or stored.  OFFSET_TYPE
+   is the type of the offset that is being applied to the invariant
+   base address.  If OFFSET_TYPE is scalar the function chooses an
+   appropriate vector type for it.  SCALE is the amount by which the
+   offset should be multiplied *after* it has been converted to address width.
+
+   Return true if the function is supported, storing the function id in
+   *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
+
+   If we can use gather and store the possible else values in ELSVALS.  */
+
+bool
+vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
+                         tree vectype, tree memory_type, tree offset_type,
+                         int scale, internal_fn *ifn_out,
+                         tree *offset_vectype_out, bool *sign_swap,
+                         vec<int> *elsvals)
+{
+  *sign_swap = false;
+  unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
+  unsigned int element_bits = vector_element_bits (vectype);
+  if (element_bits != memory_bits)
+    /* For now the vector elements must be the same width as the
+       memory elements.  */
+    return false;
+
+  bool ok = vect_gather_scatter_fn_1 (vinfo, read_p, masked_p, vectype,
+                                     memory_type, offset_type, scale,
+                                     ifn_out, offset_vectype_out, elsvals);
+  if (ok)
+    return ok;
+
+  /* If the offset type is unsupported try a larger one with swapped
+     signedness.  If we started out with a signed type we can try a
+     pointer-sized unsigned type.  For an unsigned type a signed type
+     of twice the size is sufficient.  */
+  if (VECTOR_TYPE_P (offset_type))
+    offset_type = TREE_TYPE (offset_type);
+  if (!TYPE_OVERFLOW_WRAPS (offset_type))
+    offset_type = build_nonstandard_integer_type (POINTER_SIZE, 1);
+  else
+    {
+      int prec = TYPE_PRECISION (offset_type) * 2;
+      prec = std::min ((int) POINTER_SIZE, prec);
+      offset_type = build_nonstandard_integer_type (prec, 0);
+    }
+  if (vect_gather_scatter_fn_1 (vinfo, read_p, masked_p, vectype,
+                               memory_type, offset_type, scale,
+                               ifn_out, offset_vectype_out, elsvals))
+    {
+      *sign_swap = true;
+      return true;
+    }
+
+  return false;
+}
+
/* STMT_INFO is a call to an internal gather load or scatter store function.
   Describe the operation in INFO.  */

@@ -4678,6 +4723,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree 
vectype,

  base = fold_convert (sizetype, base);
  base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
+  bool sign_swap;

  /* OFF at this point may be either a SSA_NAME or some tree expression
     from get_inner_reference.  Try to peel off loop invariants from it
@@ -4751,13 +4797,13 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
tree vectype,
                                                masked_p, vectype, memory_type,
                                                signed_char_type_node,
                                                new_scale, &ifn,
-                                               &offset_vectype,
+                                               &offset_vectype, &sign_swap,
                                                elsvals)
                  && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
                                                masked_p, vectype, memory_type,
                                                unsigned_char_type_node,
                                                new_scale, &ifn,
-                                               &offset_vectype,
+                                               &offset_vectype, &sign_swap,
                                                elsvals))
                break;
              scale = new_scale;
@@ -4781,7 +4827,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree 
vectype,
              && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
                                           masked_p, vectype, memory_type,
                                           TREE_TYPE (off), scale, &ifn,
-                                          &offset_vectype, elsvals))
+                                          &offset_vectype, &sign_swap,
+                                          elsvals))
            break;

          if (TYPE_PRECISION (TREE_TYPE (op0))
@@ -4835,7 +4882,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree 
vectype,
    {
      if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
                                     vectype, memory_type, offtype, scale,
-                                    &ifn, &offset_vectype, elsvals))
+                                    &ifn, &offset_vectype, &sign_swap,
+                                    elsvals))
        ifn = IFN_LAST;
      decl = NULL_TREE;
    }
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 9fcc2fd0849..1ddb5871983 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1694,10 +1694,11 @@ vect_truncate_gather_scatter_offset (stmt_vec_info 
stmt_info, tree vectype,
      /* See whether the target supports the operation with an offset
         no narrower than OFFSET_TYPE.  */
      tree memory_type = TREE_TYPE (DR_REF (dr));
+      bool sign_swap = false;
      if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
                                     vectype, memory_type, offset_type, scale,
                                     &gs_info->ifn, &gs_info->offset_vectype,
-                                    elsvals)
+                                    &sign_swap, elsvals)
          || gs_info->ifn == IFN_LAST)
        continue;

@@ -1978,6 +1979,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
stmt_info,
  int *misalignment = &ls->misalignment;
  internal_fn *lanes_ifn = &ls->lanes_ifn;
  vec<int> *elsvals = &ls->elsvals;
+  tree *sign_swap_offset_vectype = &ls->sign_swap_offset_vectype;
+  *sign_swap_offset_vectype = NULL_TREE;
  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
  class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
@@ -2038,13 +2041,18 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
stmt_info,
      int scale = SLP_TREE_GS_SCALE (slp_node);
      tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
      tree tem;
+      bool sign_swap = false;
      if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
                                    masked_p, vectype,
                                    memory_type,
                                    offset_vectype, scale,
-                                   &ls->gs.ifn, &tem,
+                                   &ls->gs.ifn, &tem, &sign_swap,
                                    elsvals))
-       *memory_access_type = VMAT_GATHER_SCATTER_IFN;
+       {
+         if (sign_swap)
+           *sign_swap_offset_vectype = tem;
+         *memory_access_type = VMAT_GATHER_SCATTER_IFN;
+       }
      else if (vls_type == VLS_LOAD
               ? (targetm.vectorize.builtin_gather
                  && (ls->gs.decl
@@ -8623,6 +8631,11 @@ vectorizable_store (vec_info *vinfo,
            {
              if (costing_p)
                {
+                 if (ls.sign_swap_offset_vectype)
+                   inside_cost
+                     += record_stmt_cost (cost_vec, 1, vector_stmt,
+                                          slp_node, 0, vect_body);
+
                  unsigned int cnunits = vect_nunits_for_cost (vectype);
                  inside_cost
                    += record_stmt_cost (cost_vec, cnunits, scalar_store,
@@ -8634,6 +8647,16 @@ vectorizable_store (vec_info *vinfo,
                vec_offset = vec_offsets[j];

              tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
+             bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
+
+             if (!strided)
+               {
+                 gimple_seq stmts = NULL;
+                 if (ls.sign_swap_offset_vectype)
+                   vec_offset = gimple_convert
+                     (&stmts, ls.sign_swap_offset_vectype, vec_offset);
+                 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+               }

              if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
                {
@@ -10471,6 +10494,11 @@ vectorizable_load (vec_info *vinfo,
            {
              if (costing_p)
                {
+                 if (ls.sign_swap_offset_vectype)
+                   inside_cost
+                     += record_stmt_cost (cost_vec, 1, vector_stmt,
+                                          slp_node, 0, vect_body);
+
                  unsigned int cnunits = vect_nunits_for_cost (vectype);
                  inside_cost
                    = record_stmt_cost (cost_vec, cnunits, scalar_load,
@@ -10481,6 +10509,16 @@ vectorizable_load (vec_info *vinfo,
                vec_offset = vec_offsets[i];
              tree zero = build_zero_cst (vectype);
              tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
+             bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
+
+             if (!strided)
+               {
+                 gimple_seq stmts = NULL;
+                 if (ls.sign_swap_offset_vectype)
+                   vec_offset = gimple_convert
+                     (&stmts, ls.sign_swap_offset_vectype, vec_offset);
+                 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+               }

              if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
                {
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 6872f8a03d2..7ea20f3ef4a 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -287,6 +287,7 @@ struct vect_load_store_data : vect_data {
      tree decl;        // VMAT_GATHER_SCATTER_DECL
  } gs;
  tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
+  tree sign_swap_offset_vectype; // VMAT_GATHER_SCATTER_IFN
  auto_vec<int> elsvals;
};

@@ -2621,7 +2622,7 @@ extern opt_result vect_analyze_data_ref_accesses (vec_info *, 
vec<int> *);
extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
                                      tree, int, internal_fn *, tree *,
-                                     vec<int> * = nullptr);
+                                     bool *, vec<int> * = nullptr);
extern bool vect_check_gather_scatter (stmt_vec_info, tree,
                                       loop_vec_info, gather_scatter_info *,
                                       vec<int> * = nullptr);
--
2.51.0


Reply via email to