Hi,
This patch adjusts vect_gather_scatter_fn_p to always check an offset
type with swapped signedness (vs. the original offset argument).
If the target supports the gather/scatter with the new offset type as
well as the conversion of the offset we now emit an explicit offset
conversion before the actual gather/scatter.
The relaxation is only done for the IFN path of gather/scatter and the
general idea looks roughly like:
- vect_gather_scatter_fn_p tries both signed and unsigned offset type
and sets supported_offset_vectype to the type that actually worked
while offset_vectype_out is the type that was requested.
- vect_check_gather_scatter works as before but uses the relaxed
vect_gather_scatter_fn_p.
- get_load_store_type sets ls_data->supported_offset_vectype if the
requested type wasn't supported but another one was.
- check_load_store_for_partial_vectors uses the
supported_offset_vectype in order to validate what get_load_store_type
determined.
- vectorizable_load/store emit a conversion if
ls_data->supported_offset_vectype is nonzero and cost it.
The offset type is either of pointer size (if we started with a signed
offset) or twice the size of the original offset (when that one was
unsigned).
Changes from v1:
- Check for conversion support.
- Rework/refactor vect_gather_scatter_fn_p.
I'm aware it's not exactly pretty but I hope to not have complicated
things too much. Suggestions welcome of course.
Bootstrapped on x86 and power10, regtested on aarch64 and rv64gcv_zvl512b.
Regards
Robin
gcc/ChangeLog:
* tree-vect-data-refs.cc (vect_gather_scatter_fn_p):
Use vect_gather_scatter_try_ifns.
(vect_gather_scatter_try_ifns): New function.
(vect_check_gather_scatter): Add argument to
vect_gather_scatter_fn_p.
* tree-vect-stmts.cc (vect_truncate_gather_scatter_offset):
Ditto.
(vect_use_grouped_gather): Ditto.
(get_load_store_type): Ditto.
(vectorizable_store): Cost and emit conversion.
(vectorizable_load): Ditto.
* tree-vectorizer.h (struct vect_load_store_data): Add
supported_offset_vectype.
(vect_gather_scatter_fn_p): Add argument.
---
gcc/tree-vect-data-refs.cc | 115 +++++++++++++++++++++++++++++--------
gcc/tree-vect-stmts.cc | 80 ++++++++++++++++++++++++--
gcc/tree-vectorizer.h | 6 +-
3 files changed, 169 insertions(+), 32 deletions(-)
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index c7941108887..9f88627b533 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4425,32 +4425,16 @@ vect_prune_runtime_alias_test_list (loop_vec_info
loop_vinfo)
return opt_result::success ();
}
-/* Check whether we can use an internal function for a gather load
- or scatter store. READ_P is true for loads and false for stores.
- MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
- the type of the memory elements being loaded or stored. OFFSET_TYPE
- is the type of the offset that is being applied to the invariant
- base address. If OFFSET_TYPE is scalar the function chooses an
- appropriate vector type for it. SCALE is the amount by which the
- offset should be multiplied *after* it has been converted to address width.
+/* Helper for vect_gather_scatter_fn that checks if there is a supported
+ gather/scatter internal function with the given parameters. */
- Return true if the function is supported, storing the function id in
- *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
-
- If we can use gather and store the possible else values in ELSVALS. */
-
-bool
-vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
- tree vectype, tree memory_type, tree offset_type,
- int scale, internal_fn *ifn_out,
- tree *offset_vectype_out, vec<int> *elsvals)
+static bool
+vect_gather_scatter_try_ifns (vec_info *vinfo, bool read_p, bool masked_p,
+ tree vectype, tree memory_type, tree offset_type,
+ int scale, internal_fn *ifn_out,
+ tree *offset_vectype_out, vec<int> *elsvals)
{
- unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
unsigned int element_bits = vector_element_bits (vectype);
- if (element_bits != memory_bits)
- /* For now the vector elements must be the same width as the
- memory elements. */
- return false;
/* Work out which function we need. */
internal_fn ifn, alt_ifn, alt_ifn2;
@@ -4528,6 +4512,80 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p,
bool masked_p,
}
}
+/* Check whether we can use an internal function for a gather load
+ or scatter store. READ_P is true for loads and false for stores.
+ MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
+ the type of the memory elements being loaded or stored. OFFSET_TYPE
+ is the type of the offset that is being applied to the invariant
+ base address. If OFFSET_TYPE is scalar the function chooses an
+ appropriate vector type for it. SCALE is the amount by which the
+ offset should be multiplied *after* it has been converted to address width.
+
+ Return true if the function is supported, storing the function id in
+ *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
+ If we support an offset vector type with different signedness than
+ OFFSET_TYPE store it in SUPPORTED_OFFSET_VECTYPE.
+
+ If we can use gather/scatter and ELSVALS is nonzero, store the possible
+ else values in ELSVALS. */
+
+bool
+vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
+ tree vectype, tree memory_type, tree offset_type,
+ int scale, internal_fn *ifn_out,
+ tree *offset_vectype_out,
+ tree *supported_offset_vectype,
+ vec<int> *elsvals)
+{
+ *supported_offset_vectype = NULL_TREE;
+ unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
+ unsigned int element_bits = vector_element_bits (vectype);
+ if (element_bits != memory_bits)
+ /* For now the vector elements must be the same width as the
+ memory elements. */
+ return false;
+
+ /* Try if the current offset type and scale are supported directly. */
+ if (vect_gather_scatter_try_ifns (vinfo, read_p, masked_p, vectype,
+ memory_type, offset_type, scale,
+ ifn_out, offset_vectype_out, elsvals))
+ return true;
+
+ enum tree_code tmp;
+ tree offset_vectype_old = VECTOR_TYPE_P (offset_type)
+ ? offset_type : get_vectype_for_scalar_type (vinfo, offset_type);
+
+ /* If the offset type is unsupported try a larger one with swapped
+ signedness. If we started out with a signed type we can try a
+ pointer-sized unsigned type. For an unsigned type a signed type
+ of twice the size is sufficient. */
+ if (VECTOR_TYPE_P (offset_type))
+ offset_type = TREE_TYPE (offset_type);
+ if (!TYPE_OVERFLOW_WRAPS (offset_type))
+ offset_type = build_nonstandard_integer_type (POINTER_SIZE, 1);
+ else
+ {
+ int prec = TYPE_PRECISION (offset_type) * 2;
+ prec = std::min ((int) POINTER_SIZE, prec);
+ offset_type = build_nonstandard_integer_type (prec, 0);
+ }
+ if (vect_gather_scatter_try_ifns (vinfo, read_p, masked_p, vectype,
+ memory_type, offset_type, scale,
+ ifn_out, offset_vectype_out, elsvals)
+ && (tree_nop_conversion_p (*offset_vectype_out, offset_vectype_old)
+ || supportable_convert_operation (CONVERT_EXPR, *offset_vectype_out,
+ offset_vectype_old, &tmp)))
+
+ {
+ if (!tree_nop_conversion_p (*offset_vectype_out, offset_vectype_old))
+ *supported_offset_vectype = *offset_vectype_out;
+ *offset_vectype_out = offset_vectype_old;
+ return true;
+ }
+
+ return false;
+}
+
/* STMT_INFO is a call to an internal gather load or scatter store function.
Describe the operation in INFO. */
@@ -4678,6 +4736,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree
vectype,
base = fold_convert (sizetype, base);
base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
+ tree tmp_offset_vectype;
/* OFF at this point may be either a SSA_NAME or some tree expression
from get_inner_reference. Try to peel off loop invariants from it
@@ -4752,12 +4811,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info,
tree vectype,
signed_char_type_node,
new_scale, &ifn,
&offset_vectype,
+ &tmp_offset_vectype,
elsvals)
&& !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
unsigned_char_type_node,
new_scale, &ifn,
&offset_vectype,
+ &tmp_offset_vectype,
elsvals))
break;
scale = new_scale;
@@ -4781,7 +4842,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree
vectype,
&& vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
TREE_TYPE (off), scale, &ifn,
- &offset_vectype, elsvals))
+ &offset_vectype,
+ &tmp_offset_vectype,
+ elsvals))
break;
if (TYPE_PRECISION (TREE_TYPE (op0))
@@ -4835,7 +4898,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree
vectype,
{
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
vectype, memory_type, offtype, scale,
- &ifn, &offset_vectype, elsvals))
+ &ifn, &offset_vectype,
+ &tmp_offset_vectype,
+ elsvals))
ifn = IFN_LAST;
decl = NULL_TREE;
}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index d8b1ee73b19..ff26461f6c5 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1505,6 +1505,14 @@ check_load_store_for_partial_vectors (loop_vec_info
loop_vinfo, tree vectype,
: ls->strided_offset_vectype);
tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr));
int scale = SLP_TREE_GS_SCALE (slp_node);
+
+ /* The following "supported" checks just verify what we established in
+ get_load_store_type and don't try different offset types.
+ Therefore, off_vectype must be a supported offset type. In case
+ we chose a different one use this instead. */
+ if (ls->supported_offset_vectype)
+ off_vectype = ls->supported_offset_vectype;
+
if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
memory_type,
off_vectype, scale,
@@ -1697,10 +1705,12 @@ vect_truncate_gather_scatter_offset (stmt_vec_info
stmt_info, tree vectype,
/* See whether the target supports the operation with an offset
no narrower than OFFSET_TYPE. */
tree memory_type = TREE_TYPE (DR_REF (dr));
+ tree tmp_offset_vectype;
+ int supported_scale;
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
vectype, memory_type, offset_type, scale,
&gs_info->ifn, &gs_info->offset_vectype,
- elsvals)
+ &tmp_offset_vectype, elsvals)
|| gs_info->ifn == IFN_LAST)
continue;
@@ -1779,10 +1789,11 @@ vect_use_grouped_gather (dr_vec_info *dr_info, tree
vectype,
type must exist) so it is possible that even though a gather/scatter is
not available we still have a strided load/store. */
bool ok = false;
+ tree tmp_vectype;
if (vect_gather_scatter_fn_p
(loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype,
TREE_TYPE (*pun_vectype), *pun_vectype, 1, &ifn,
- &offset_vectype, elsvals))
+ &offset_vectype, &tmp_vectype, elsvals))
ok = true;
else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype,
elsvals))
@@ -2081,6 +2092,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
stmt_info,
internal_fn *lanes_ifn = &ls->lanes_ifn;
vec<int> *elsvals = &ls->elsvals;
tree *ls_type = &ls->ls_type;
+ tree *supported_offset_vectype = &ls->supported_offset_vectype;
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
@@ -2144,12 +2156,25 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
stmt_info,
tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
tree tem;
if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
- masked_p, vectype,
- memory_type,
+ masked_p, vectype, memory_type,
offset_vectype, scale,
&ls->gs.ifn, &tem,
- elsvals))
- *memory_access_type = VMAT_GATHER_SCATTER_IFN;
+ supported_offset_vectype, elsvals))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "gather/scatter with required "
+ "offset vector type "
+ "%T and offset scale %d.\n",
+ offset_vectype, scale);
+ if (*supported_offset_vectype)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " target supports offset vector type %T.\n",
+ *supported_offset_vectype);
+ }
+ *memory_access_type = VMAT_GATHER_SCATTER_IFN;
+ }
else if (vls_type == VLS_LOAD
? (targetm.vectorize.builtin_gather
&& (ls->gs.decl
@@ -2413,6 +2438,19 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
stmt_info,
masked_p, &gs_info, elsvals,
group_size, single_element_p))
{
+ /* vect_use_strided_gather_scatters_p does not save the actually
+ supported scale and offset type so do that here.
+ We need it later in check_load_store_for_partial_vectors
+ where we only check if the given internal function is supported
+ (to choose whether to use the IFN, LEGACY, or EMULATED flavor
+ of gather/scatter) and don't re-do the full analysis. */
+ tree tmp;
+ gcc_assert (vect_gather_scatter_fn_p
+ (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype,
+ gs_info.memory_type, TREE_TYPE (gs_info.offset),
+ gs_info.scale, &gs_info.ifn,
+ &tmp, supported_offset_vectype, elsvals));
+
SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
SLP_TREE_GS_BASE (slp_node) = error_mark_node;
ls->gs.ifn = gs_info.ifn;
@@ -8746,6 +8784,11 @@ vectorizable_store (vec_info *vinfo,
{
if (costing_p)
{
+ if (ls.supported_offset_vectype)
+ inside_cost
+ += record_stmt_cost (cost_vec, 1, vector_stmt,
+ slp_node, 0, vect_body);
+
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
+= record_stmt_cost (cost_vec, cnunits, scalar_store,
@@ -8757,6 +8800,16 @@ vectorizable_store (vec_info *vinfo,
vec_offset = vec_offsets[j];
tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
+ bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
+
+ /* Perform the offset conversion if necessary. */
+ if (!strided && ls.supported_offset_vectype)
+ {
+ gimple_seq stmts = NULL;
+ vec_offset = gimple_convert
+ (&stmts, ls.supported_offset_vectype, vec_offset);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ }
if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
{
@@ -10628,6 +10681,11 @@ vectorizable_load (vec_info *vinfo,
{
if (costing_p)
{
+ if (ls.supported_offset_vectype)
+ inside_cost
+ += record_stmt_cost (cost_vec, 1, vector_stmt,
+ slp_node, 0, vect_body);
+
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
= record_stmt_cost (cost_vec, cnunits, scalar_load,
@@ -10638,6 +10696,16 @@ vectorizable_load (vec_info *vinfo,
vec_offset = vec_offsets[i];
tree zero = build_zero_cst (vectype);
tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
+ bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
+
+ /* Perform the offset conversion if necessary. */
+ if (!strided && ls.supported_offset_vectype)
+ {
+ gimple_seq stmts = NULL;
+ vec_offset = gimple_convert
+ (&stmts, ls.supported_offset_vectype, vec_offset);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ }
if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
{
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 52bc0d672bf..39d67fcd081 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -289,6 +289,10 @@ struct vect_load_store_data : vect_data {
} gs;
tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
tree ls_type; // VMAT_GATHER_SCATTER_IFN
+ /* This is set to a supported offset vector type if we don't support the
+ originally requested offset type. In that case there will be an
+ additional offset conversion before the gather/scatter. */
+ tree supported_offset_vectype; // VMAT_GATHER_SCATTER_IFN
auto_vec<int> elsvals;
unsigned n_perms; // SLP_TREE_LOAD_PERMUTATION
};
@@ -2595,7 +2599,7 @@ extern opt_result vect_analyze_data_ref_accesses
(vec_info *, vec<int> *);
extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
tree, int, internal_fn *, tree *,
- vec<int> * = nullptr);
+ tree *, vec<int> * = nullptr);
extern bool vect_check_gather_scatter (stmt_vec_info, tree,
loop_vec_info, gather_scatter_info *,
vec<int> * = nullptr);
--
2.51.0