On Fri, Sep 18, 2020 at 8:18 AM Xiong Hu Luo <[email protected]> wrote:
>
> This patch enables transformation from ARRAY_REF(VIEW_CONVERT_EXPR) to
> VEC_SET internal function in gimple-isel pass if target supports
> vec_set with variable index by checking can_vec_set_var_idx_p.
>
> gcc/ChangeLog:
>
> 2020-09-18 Xionghu Luo <[email protected]>
>
> * gimple-isel.cc (gimple_expand_vec_set_expr): New function.
> (gimple_expand_vec_cond_exprs): Call gimple_expand_vec_set_expr.
> * internal-fn.c (vec_set_direct): New define.
> (expand_vec_set_optab_fn): New function.
> (direct_vec_set_optab_supported_p): New define.
> * internal-fn.def (VEC_SET): New DEF_INTERNAL_OPTAB_FN.
> * optabs.c (can_vec_set_var_idx_p): New function.
> * optabs.h (can_vec_set_var_idx_p): New declare.
> ---
> gcc/gimple-isel.cc | 116 +++++++++++++++++++++++++++++++++++++++++++-
> gcc/internal-fn.c | 36 ++++++++++++++
> gcc/internal-fn.def | 2 +
> gcc/optabs.c | 17 +++++++
> gcc/optabs.h | 3 ++
> 5 files changed, 172 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> index b330cf4c20e..bc61e2895be 100644
> --- a/gcc/gimple-isel.cc
> +++ b/gcc/gimple-isel.cc
> @@ -35,6 +35,80 @@ along with GCC; see the file COPYING3. If not see
> #include "tree-cfg.h"
> #include "bitmap.h"
> #include "tree-ssa-dce.h"
> +#include "fold-const.h"
> +#include "gimple-fold.h"
> +#include "memmodel.h"
> +#include "optabs.h"
> +
> +/* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to
> + internal function based on vector type of selected expansion.
> + i.e.:
> + VIEW_CONVERT_EXPR<int[4]>(u)[_1] = = i_4(D);
> + =>
> + _7 = u;
> + _8 = .VEC_SET (_7, i_4(D), _1);
> + u = _8; */
> +
> +static gimple *
> +gimple_expand_vec_set_expr (gimple_stmt_iterator *gsi)
> +{
> + enum tree_code code;
> + gcall *new_stmt = NULL;
> + gassign *ass_stmt = NULL;
> +
> + /* Only consider code == GIMPLE_ASSIGN. */
> + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
> + if (!stmt)
> + return NULL;
> +
> + code = TREE_CODE (gimple_assign_lhs (stmt));
do the lhs = gimple_assign_lhs (stmt) before and elide cond,
putting the TREE_CODE into the if below.
> + if (code != ARRAY_REF)
> + return NULL;
> +
> + tree lhs = gimple_assign_lhs (stmt);
> + tree val = gimple_assign_rhs1 (stmt);
> +
> + tree type = TREE_TYPE (lhs);
> + tree op0 = TREE_OPERAND (lhs, 0);
> + if (TREE_CODE (op0) == VIEW_CONVERT_EXPR
So I think we want to have an exact structural match first here, so
if (TREE_CODE (op0) == VIEW_CONVERT_EXPR
&& DECL_P (TREE_OPERAND (op0, 0))
&& VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (op0, 0)))
&& TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE
(TREE_TYPE (TREE_OPERAND (op0, 0))))
which means we're sure to do an element extract from a vector type
(and we know all vector types have sane element types).
> + && tree_fits_uhwi_p (TYPE_SIZE (type)))
> + {
> + tree pos = TREE_OPERAND (lhs, 1);
> + tree view_op0 = TREE_OPERAND (op0, 0);
> + machine_mode outermode = TYPE_MODE (TREE_TYPE (view_op0));
> + scalar_mode innermode = GET_MODE_INNER (outermode);
> + tree_code code = TREE_CODE (TREE_TYPE(view_op0));
> + if (!is_global_var (view_op0) && code == VECTOR_TYPE
> + && tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (view_op0)))
why did you need those TYPE_SIZE checks? As said earlier
you want !TREE_ADDRESSABLE (view_op0) and eventually
the stronger auto_var_in_fn_p (view_op0, cfun) rather than !is_global_var.
> + && can_vec_set_var_idx_p (code, outermode, innermode,
> + TYPE_MODE (TREE_TYPE (pos))))
> + {
> + location_t loc = gimple_location (stmt);
> + tree var_src = make_ssa_name (TREE_TYPE (view_op0));
> + tree var_dst = make_ssa_name (TREE_TYPE (view_op0));
> +
> + ass_stmt = gimple_build_assign (var_src, view_op0);
> + gimple_set_vuse (ass_stmt, gimple_vuse (stmt));
> + gimple_set_location (ass_stmt, loc);
> + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT);
> +
> + new_stmt
> + = gimple_build_call_internal (IFN_VEC_SET, 3, var_src, val, pos);
> + gimple_call_set_lhs (new_stmt, var_dst);
> + gimple_set_location (new_stmt, loc);
> + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
> +
> + ass_stmt = gimple_build_assign (view_op0, var_dst);
> + gimple_set_location (ass_stmt, loc);
> + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT);
> +
> + gimple_move_vops (ass_stmt, stmt);
> + gsi_remove (gsi, true);
> + }
> + }
> +
> + return ass_stmt;
> +}
>
> /* Expand all VEC_COND_EXPR gimple assignments into calls to internal
> function based on type of selected expansion. */
> @@ -187,8 +261,25 @@ gimple_expand_vec_cond_exprs (void)
> {
> for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> {
> - gimple *g = gimple_expand_vec_cond_expr (&gsi,
> - &vec_cond_ssa_name_uses);
> + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (gsi));
> + if (!stmt)
> + continue;
> +
> + enum tree_code code;
> + gimple *g = NULL;
> + code = gimple_assign_rhs_code (stmt);
> + switch (code)
> + {
> + case VEC_COND_EXPR:
> + g = gimple_expand_vec_cond_expr (&gsi, &vec_cond_ssa_name_uses);
> + break;
> + case ARRAY_REF:
> + /* TODO: generate IFN for vec_extract with variable index. */
so why not do this here?
> + break;
> + default:
> + break;
> + }
> +
> if (g != NULL)
> {
> tree lhs = gimple_assign_lhs (gsi_stmt (gsi));
> @@ -204,6 +295,27 @@ gimple_expand_vec_cond_exprs (void)
>
> simple_dce_from_worklist (dce_ssa_names);
>
> + FOR_EACH_BB_FN (bb, cfun)
but in a separate loop?
> + {
> + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> + {
> + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (gsi));
> + if (!stmt)
> + continue;
> +
> + enum tree_code code;
> + code = TREE_CODE (gimple_assign_lhs (stmt));
> + switch (code)
> + {
> + case ARRAY_REF:
> + gimple_expand_vec_set_expr (&gsi);
> + break;
> + default:
> + break;
> + }
> + }
> + }
> +
> return 0;
> }
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 8efc77d986b..36837381c04 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -115,6 +115,7 @@ init_internal_fns ()
> #define vec_condeq_direct { 0, 0, false }
> #define scatter_store_direct { 3, 1, false }
> #define len_store_direct { 3, 3, false }
> +#define vec_set_direct { 3, 3, false }
> #define unary_direct { 0, 0, true }
> #define binary_direct { 0, 0, true }
> #define ternary_direct { 0, 0, true }
> @@ -2658,6 +2659,40 @@ expand_vect_cond_mask_optab_fn (internal_fn, gcall
> *stmt, convert_optab optab)
>
> #define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn
>
> +static void
> +expand_vec_set_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
all new functions require a function level comment
> +{
> + tree lhs = gimple_call_lhs (stmt);
> + tree op0 = gimple_call_arg (stmt, 0);
> + tree op1 = gimple_call_arg (stmt, 1);
> + tree op2 = gimple_call_arg (stmt, 2);
> + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> + rtx src = expand_expr (op0, NULL_RTX, VOIDmode, EXPAND_WRITE);
> +
> + machine_mode outermode = TYPE_MODE (TREE_TYPE (op0));
> + scalar_mode innermode = GET_MODE_INNER (outermode);
> +
> + rtx value = expand_expr (op1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
> + rtx pos = expand_expr (op2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
> +
> + class expand_operand ops[3];
> + enum insn_code icode = optab_handler (optab, outermode);
> +
> + if (icode != CODE_FOR_nothing)
> + {
> + pos = convert_to_mode (E_SImode, pos, 0);
> +
> + create_fixed_operand (&ops[0], src);
> + create_input_operand (&ops[1], value, innermode);
> + create_input_operand (&ops[2], pos, GET_MODE (pos));
> + if (maybe_expand_insn (icode, 3, ops))
> + {
> + emit_move_insn (target, src);
I think you need to assert that we end up here.
> + return;
> + }
> + }
> +}
> +
> static void
> expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
> {
> @@ -3253,6 +3288,7 @@ multi_vector_optab_supported_p (convert_optab optab,
> tree_pair types,
> #define direct_fold_left_optab_supported_p direct_optab_supported_p
> #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
> #define direct_check_ptrs_optab_supported_p direct_optab_supported_p
> +#define direct_vec_set_optab_supported_p direct_optab_supported_p
>
> /* Return the optab used by internal function FN. */
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 13e60828fcf..e6cfe1b6159 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -145,6 +145,8 @@ DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
> DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
> DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
>
> +DEF_INTERNAL_OPTAB_FN (VEC_SET, 0, vec_set, vec_set)
> +
> DEF_INTERNAL_OPTAB_FN (LEN_STORE, 0, len_store, len_store)
>
> DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
> diff --git a/gcc/optabs.c b/gcc/optabs.c
> index 184827fdf4e..c8125670d2d 100644
> --- a/gcc/optabs.c
> +++ b/gcc/optabs.c
> @@ -3841,6 +3841,23 @@ can_vcond_compare_p (enum rtx_code code, machine_mode
> value_mode,
> && insn_operand_matches (icode, 3, test);
> }
>
> +bool
> +can_vec_set_var_idx_p (enum tree_code code, machine_mode vec_mode,
> + machine_mode value_mode, machine_mode idx_mode)
toplevel comment missing
> +{
> + gcc_assert (code == VECTOR_TYPE);
what's the point of pasing 'code' here then? Since the optab only has a single
mode, the vector mode, the value_mode is redundant as well. And I guess
we might want to handle "arbitrary" index modes? That is, the .md expanders
should not restrict its mode - I guess it simply uses VOIDmode at the moment
(for integer constants). Not sure how to best do this without an explicit mode
in the optab ...
> +
> + rtx reg1 = alloca_raw_REG (vec_mode, LAST_VIRTUAL_REGISTER + 1);
> + rtx reg2 = alloca_raw_REG (value_mode, LAST_VIRTUAL_REGISTER + 2);
> + rtx reg3 = alloca_raw_REG (idx_mode, LAST_VIRTUAL_REGISTER + 3);
> +
> + enum insn_code icode = optab_handler (vec_set_optab, vec_mode);
> +
> + return icode != CODE_FOR_nothing && insn_operand_matches (icode, 0, reg1)
> + && insn_operand_matches (icode, 1, reg2)
> + && insn_operand_matches (icode, 2, reg3);
> +}
> +
> /* This function is called when we are going to emit a compare instruction
> that
> compares the values found in X and Y, using the rtl operator COMPARISON.
>
> diff --git a/gcc/optabs.h b/gcc/optabs.h
> index 7c2ec257cb0..c018d127756 100644
> --- a/gcc/optabs.h
> +++ b/gcc/optabs.h
> @@ -249,6 +249,9 @@ extern int can_compare_p (enum rtx_code, machine_mode,
> VALUE_MODE. */
> extern bool can_vcond_compare_p (enum rtx_code, machine_mode, machine_mode);
>
> +extern bool can_vec_set_var_idx_p (enum tree_code, machine_mode,
> machine_mode,
> + machine_mode);
> +
> extern rtx prepare_operand (enum insn_code, rtx, int, machine_mode,
> machine_mode, int);
> /* Emit a pair of rtl insns to compare two rtx's and to jump
> --
> 2.27.0.90.geebb51ba8c
>