On August 6, 2021 3:47:44 PM GMT+02:00, Richard Sandiford
<[email protected]> wrote:
>Richard Biener via Gcc-patches <[email protected]> writes:
>> This removes the cost part of vect_worthwhile_without_simd_p, retaining
>> only the correctness bits. The reason is that the cost heuristic
>> do not properly account for SLP plus the check whether "without simd"
>> applies misfires for AVX512 mask vectors at the moment, leading to
>> missed vectorizations there.
>>
>> Any costing decision should take place in the cost modeling, no
>> single stmt is to disable all vectorization on its own.
>>
>> Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed to trunk.
>>
>> 2021-08-06 Richard Biener <[email protected]>
>>
>> PR tree-optimization/101801
>> * tree-vectorizer.h (vect_worthwhile_without_simd_p): Rename...
>> (vect_can_vectorize_without_simd_p): ... to this.
>> * tree-vect-loop.c (vect_worthwhile_without_simd_p): Rename...
>> (vect_can_vectorize_without_simd_p): ... to this and fold
>> in vect_min_worthwhile_factor.
>> (vect_min_worthwhile_factor): Remove.
>> (vectorizable_reduction): Adjust and remove the cost part.
>> * tree-vect-stmts.c (vectorizable_shift): Likewise.
>> (vectorizable_operation): Likewise.
>> ---
>> gcc/tree-vect-loop.c | 43 +++++++------------------------------------
>> gcc/tree-vect-stmts.c | 26 ++------------------------
>> gcc/tree-vectorizer.h | 2 +-
>> 3 files changed, 10 insertions(+), 61 deletions(-)
>>
>> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
>> index 1e21fe6b13d..37c7daa7f9e 100644
>> --- a/gcc/tree-vect-loop.c
>> +++ b/gcc/tree-vect-loop.c
>> @@ -7227,24 +7227,13 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
>> if (dump_enabled_p ())
>> dump_printf (MSG_NOTE, "op not supported by target.\n");
>> if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
>> - || !vect_worthwhile_without_simd_p (loop_vinfo, code))
>> + || !vect_can_vectorize_without_simd_p (code))
>> ok = false;
>> else
>> if (dump_enabled_p ())
>> dump_printf (MSG_NOTE, "proceeding using word mode.\n");
>> }
>>
>> - /* Worthwhile without SIMD support? */
>> - if (ok
>> - && !VECTOR_MODE_P (TYPE_MODE (vectype_in))
>> - && !vect_worthwhile_without_simd_p (loop_vinfo, code))
>> - {
>> - if (dump_enabled_p ())
>> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> - "not worthwhile without SIMD support.\n");
>> - ok = false;
>> - }
>> -
>> /* lane-reducing operations have to go through
>> vect_transform_reduction.
>> For the other cases try without the single cycle optimization. */
>> if (!ok)
>> @@ -7948,46 +7937,28 @@ vectorizable_phi (vec_info *,
>> }
>>
>>
>> -/* Function vect_min_worthwhile_factor.
>> +/* Return true if we can emulate CODE on an integer mode representation
>> + of a vector. */
>>
>> - For a loop where we could vectorize the operation indicated by CODE,
>> - return the minimum vectorization factor that makes it worthwhile
>> - to use generic vectors. */
>> -static unsigned int
>> -vect_min_worthwhile_factor (enum tree_code code)
>> +bool
>> +vect_can_vectorize_without_simd_p (tree_code code)
>> {
>> switch (code)
>> {
>> case PLUS_EXPR:
>> case MINUS_EXPR:
>> case NEGATE_EXPR:
>> - return 4;
>> -
>> case BIT_AND_EXPR:
>> case BIT_IOR_EXPR:
>> case BIT_XOR_EXPR:
>> case BIT_NOT_EXPR:
>> - return 2;
>> + return true;
>>
>> default:
>> - return INT_MAX;
>> + return false;
>> }
>> }
>>
>> -/* Return true if VINFO indicates we are doing loop vectorization and if
>> - it is worth decomposing CODE operations into scalar operations for
>> - that loop's vectorization factor. */
>> -
>> -bool
>> -vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code)
>> -{
>> - loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
>> - unsigned HOST_WIDE_INT value;
>> - return (loop_vinfo
>> - && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&value)
>> - && value >= vect_min_worthwhile_factor (code));
>> -}
>> -
>> /* Function vectorizable_induction
>>
>> Check if STMT_INFO performs an induction computation that can be
>> vectorized.
>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>> index 94bdb74ea8d..5b94d41e292 100644
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -5685,24 +5685,13 @@ vectorizable_shift (vec_info *vinfo,
>> /* Check only during analysis. */
>> if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
>> || (!vec_stmt
>> - && !vect_worthwhile_without_simd_p (vinfo, code)))
>> + && !vect_can_vectorize_without_simd_p (code)))
>> return false;
>> if (dump_enabled_p ())
>> dump_printf_loc (MSG_NOTE, vect_location,
>> "proceeding using word mode.\n");
>> }
>>
>> - /* Worthwhile without SIMD support? Check only during analysis. */
>> - if (!vec_stmt
>> - && !VECTOR_MODE_P (TYPE_MODE (vectype))
>> - && !vect_worthwhile_without_simd_p (vinfo, code))
>> - {
>> - if (dump_enabled_p ())
>> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> - "not worthwhile without SIMD support.\n");
>> - return false;
>> - }
>> -
>> if (!vec_stmt) /* transformation not required. */
>> {
>> if (slp_node
>
>It looks from vect_min_worthwhile_factor like this shift stuff
>was/is effectively dead code. Maybe it started life as a copy of
>vectorizable_operation or something?
Yeah, looks like so. I did trace back the original introduction and there was
no vectirizable_shift at that point.
Richard.
Richard.
>Thanks,
>Richard
>
>> @@ -6094,24 +6083,13 @@ vectorizable_operation (vec_info *vinfo,
>> "op not supported by target.\n");
>> /* Check only during analysis. */
>> if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
>> - || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
>> + || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
>> return false;
>> if (dump_enabled_p ())
>> dump_printf_loc (MSG_NOTE, vect_location,
>> "proceeding using word mode.\n");
>> }
>>
>> - /* Worthwhile without SIMD support? Check only during analysis. */
>> - if (!VECTOR_MODE_P (vec_mode)
>> - && !vec_stmt
>> - && !vect_worthwhile_without_simd_p (vinfo, code))
>> - {
>> - if (dump_enabled_p ())
>> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> - "not worthwhile without SIMD support.\n");
>> - return false;
>> - }
>> -
>> int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
>> vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) :
>> NULL);
>> internal_fn cond_fn = get_conditional_internal_fn (code);
>> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
>> index 5571b3cce3b..de0ecf86478 100644
>> --- a/gcc/tree-vectorizer.h
>> +++ b/gcc/tree-vectorizer.h
>> @@ -2061,7 +2061,7 @@ extern bool vectorizable_lc_phi (loop_vec_info,
>> stmt_vec_info,
>> gimple **, slp_tree);
>> extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **,
>> slp_tree,
>> stmt_vector_for_cost *);
>> -extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
>> +extern bool vect_can_vectorize_without_simd_p (tree_code);
>> extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
>> stmt_vector_for_cost *,
>> stmt_vector_for_cost *,