I didn't commit yet but instead set up an arm qemu environment.
There is indeed a problem after the patch: The widen lshift standard name
requires an immediate shift-count operand and that's what the arm pattern
implements.
However, we don't lower an IFN_VEC_WIDEN_LSHIFT similar to regular shifts in
veclower. The tree code before only ever used an immediate so we never had
that situation.
The attached patch (applied on top) adds initial veclower handling for IFNs,
checking if the target supports a VOIDmode shift-count and replacing/lowering
it if so.
Is that a reasonable thing to do? For riscv I'd relax this restriction again
as our insn can handle vector, register, and immediate.
Bootstrapped on x86 and power10. Regtested on arm, rv64gcv, aarch64.
Regards
Robin
[PATCH] vect: Lower widen lshift vector shift count to scalar.
The introduction of IFN_VEC_WIDEN_LSHIFT caused test failures on arm
because the arm backend, correctly, does not accept vectors as shift
count for vec_widen_[us]_shiftl. The difference with an IFN vs
a widen-lshift tree is that we implicitly vectorize the shift count,
similar to a regular shift.
A regular vec-vec shift will be lowered to vec-imm if possible.
This patch does the same thing for IFN_VEC_WIDEN_LSHIFT
gcc/ChangeLog:
* optabs-query.cc (can_shift_by_imm_p): New function.
* optabs-query.h (can_shift_by_imm_p): Declare.
* tree-vect-generic.cc (expand_vector_widen_lshift):
New function.
(expand_vector_ifn): New function that calls
expand_vector_widen_lshift.
(expand_vector_operations_1): Call expand_vector_ifn.
---
gcc/optabs-query.cc | 18 +++++++++++
gcc/optabs-query.h | 1 +
gcc/tree-vect-generic.cc | 68 ++++++++++++++++++++++++++++++++++++++--
3 files changed, 85 insertions(+), 2 deletions(-)
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 5335d0d8401..00d095d5d9c 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -849,3 +849,21 @@ can_implement_p (optab op, machine_mode mode)
{
return can_open_code_p (op, mode) || optab_libfunc (op, mode);
}
+
+/* Whether OP (a shift optab) can shift by an immediate. */
+
+bool
+can_shift_by_imm_p (optab op, machine_mode mode)
+{
+ if (!VECTOR_MODE_P (mode))
+ return false;
+
+ enum insn_code icode = optab_handler (op, mode);
+ if (icode == CODE_FOR_nothing)
+ return false;
+
+ const struct insn_data_d *data = &insn_data[icode];
+ machine_mode shift_mode = data->operand[2].mode;
+
+ return shift_mode == VOIDmode;
+}
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
index da98af337fa..1916486025f 100644
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -176,6 +176,7 @@ opt_machine_mode get_absneg_bit_mode (optab, machine_mode,
bool can_vec_extract (machine_mode, machine_mode);
bool can_open_code_p (optab, machine_mode);
bool can_implement_p (optab, machine_mode);
+bool can_shift_by_imm_p (optab, machine_mode);
/* Version of find_widening_optab_handler_and_mode that operates on
specific mode types. */
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index 0e867293bd0..edba99a0203 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -2022,6 +2022,69 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
gsi_replace (gsi, g, false);
}
+/* Expand an internal function IFN_VEC_WIDEN_LSHIFT.
+ During vectorization we vectorized both arguments so the shift count is a
+ vector now. Check if the target supports an immediate as well and convert
+ the second argument if so. */
+
+static void
+expand_vector_widen_lshift (gcall *call)
+{
+ gcc_assert (gimple_call_num_args (call) == 2);
+
+ /* Check whether we have IFN_VEC_WIDEN_LSHIFT (arg1, {x,x,x,x})
+ where x could be a scalar variable or a constant.
+ Transform to IFN_VEC_WIDEN_LSHIFT (arg1, x). */
+ tree arg1 = gimple_call_arg (call, 0);
+ tree arg2 = gimple_call_arg (call, 1);
+
+ internal_fn ifn = gimple_call_internal_fn (call);
+
+ tree type = TREE_TYPE (arg1);
+ optab op = direct_internal_fn_optab (ifn, {type, type});
+
+ if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (arg2))
+ && can_shift_by_imm_p (op, TYPE_MODE (type)))
+ {
+ tree shift_count;
+ if ((shift_count = ssa_uniform_vector_p (arg2)) != NULL_TREE)
+ {
+ gimple_call_set_arg (call, 1, shift_count);
+ update_stmt (call);
+ return;
+ }
+ }
+
+ /* The standard name expects an immediate. If we couldn't convert the
+ argument something is wrong. */
+ gcc_unreachable ();
+}
+
+static void
+expand_vector_ifn (gimple_stmt_iterator *gsi)
+{
+ gimple *g = gsi_stmt (*gsi);
+ gcc_assert (is_gimple_call (g) && gimple_call_internal_p (g));
+
+ internal_fn ifn = gimple_call_internal_fn (g);
+
+ switch (ifn)
+ {
+ case IFN_VEC_CONVERT:
+ expand_vector_conversion (gsi);
+ break;
+ case IFN_VEC_WIDEN_LSHIFT:
+ case IFN_VEC_WIDEN_LSHIFT_HI:
+ case IFN_VEC_WIDEN_LSHIFT_LO:
+ case IFN_VEC_WIDEN_LSHIFT_EVEN:
+ case IFN_VEC_WIDEN_LSHIFT_ODD:
+ expand_vector_widen_lshift (as_a <gcall *> (g));
+ break;
+ default:
+ break;
+ }
+}
+
/* Process one statement. If we identify a vector operation, expand it. */
static void
@@ -2037,8 +2100,9 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
if (!stmt)
{
- if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
- expand_vector_conversion (gsi);
+ gimple *g = gsi_stmt (*gsi);
+ if (is_gimple_call (g) && gimple_call_internal_p (g))
+ expand_vector_ifn (gsi);
return;
}
--
2.51.0