On Tue, 2 Jan 2024, Tamar Christina wrote:
> Hi All,
>
> I was generating the vector reverse mask without checking if the target
> actually supported such an operation.
>
> It also seems like more targets implement VEC_EXTRACT than permute on mask
> registers.
>
> So this adds a check for IFN_VEC_EXTRACT support when required and changes
> the select first code to use it.
>
> This is good for now since masks always come from whilelo. But in the future
> when masks can come from other sources we will need the old code back.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> and no issues with --enable-checking=release --enable-lto
> --with-build-config=bootstrap-O3 --enable-checking=yes,rtl,extra.
> tested on cross cc1 for amdgcn-amdhsa and issue fixed.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/113199
> * tree-vect-loop.cc (vectorizable_live_operation_1): Use
> IFN_VEC_EXTRACT.
> (vectorizable_live_operation): Check for IFN_VEC_EXTRACT support.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/113199
> * gcc.target/gcn/pr113199.c: New test.
>
> --- inline copy of patch --
> diff --git a/gcc/testsuite/gcc.target/gcn/pr113199.c
> b/gcc/testsuite/gcc.target/gcn/pr113199.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..8a641e5536e80e207ca0163cac66c0f4f6ca93f7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/gcn/pr113199.c
> @@ -0,0 +1,44 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2" } */
> +
> +typedef long unsigned int size_t;
> +typedef int wchar_t;
> +struct tm
> +{
> + int tm_mon;
> + int tm_year;
> +};
> +int abs (int);
> +struct lc_time_T { const char *month[12]; };
> +struct __locale_t * __get_current_locale (void) { }
> +const struct lc_time_T * __get_time_locale (struct __locale_t *locale) { }
> +const wchar_t * __ctloc (wchar_t *buf, const char *elem, size_t *len_ret) {
> return buf; }
> +size_t
> +__strftime (wchar_t *s, size_t maxsize, const wchar_t *format,
> + const struct tm *tim_p, struct __locale_t *locale)
> +{
> + size_t count = 0;
> + const wchar_t *ctloc;
> + wchar_t ctlocbuf[256];
> + size_t i, ctloclen;
> + const struct lc_time_T *_CurrentTimeLocale = __get_time_locale (locale);
> + {
> + switch (*format)
> + {
> + case L'B':
> + (ctloc = __ctloc (ctlocbuf, _CurrentTimeLocale->month[tim_p->tm_mon],
> &ctloclen));
> + for (i = 0; i < ctloclen; i++)
> + {
> + if (count < maxsize - 1)
> + s[count++] = ctloc[i];
> + else
> + return 0;
> + {
> + int century = tim_p->tm_year >= 0
> + ? tim_p->tm_year / 100 + 1900 / 100
> + : abs (tim_p->tm_year + 1900) / 100;
> + }
> + }
> + }
> + }
> +}
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index
> 37f1be1101ffae779214056a0886411e0683e887..5aa92e67444e7aacf458fffa1428f1983c482374
> 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10648,36 +10648,18 @@ vectorizable_live_operation_1 (loop_vec_info
> loop_vinfo,
> &LOOP_VINFO_MASKS (loop_vinfo),
> 1, vectype, 0);
> tree scalar_res;
> + gimple_seq_add_seq (&stmts, tem);
>
> /* For an inverted control flow with early breaks we want EXTRACT_FIRST
> - instead of EXTRACT_LAST. Emulate by reversing the vector and mask. */
> + instead of EXTRACT_LAST. For now since the mask always comes from a
> + WHILELO we can get the first element ignoring the mask since CLZ of the
> + mask will always be zero. */
> if (restart_loop && LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> - {
> - /* First create the permuted mask. */
> - tree perm_mask = perm_mask_for_reverse (TREE_TYPE (mask));
> - tree perm_dest = copy_ssa_name (mask);
> - gimple *perm_stmt
> - = gimple_build_assign (perm_dest, VEC_PERM_EXPR, mask,
> - mask, perm_mask);
> - vect_finish_stmt_generation (loop_vinfo, stmt_info, perm_stmt,
> - &gsi);
> - mask = perm_dest;
> -
> - /* Then permute the vector contents. */
> - tree perm_elem = perm_mask_for_reverse (vectype);
> - perm_dest = copy_ssa_name (vec_lhs_phi);
> - perm_stmt
> - = gimple_build_assign (perm_dest, VEC_PERM_EXPR, vec_lhs_phi,
> - vec_lhs_phi, perm_elem);
> - vect_finish_stmt_generation (loop_vinfo, stmt_info, perm_stmt,
> - &gsi);
> - vec_lhs_phi = perm_dest;
> - }
> -
> - gimple_seq_add_seq (&stmts, tem);
> -
> - scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type,
> - mask, vec_lhs_phi);
> + scalar_res = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
> + vec_lhs_phi, bitstart);
So bitstart is always zero? I wonder why using CFN_VEC_EXTRACT over
BIT_FIELD_REF here which wouldn't need any additional target support.
> + else
> + scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type,
> + mask, vec_lhs_phi);
>
> /* Convert the extracted vector element to the scalar type. */
> new_tree = gimple_convert (&stmts, lhs_type, scalar_res);
> @@ -10852,9 +10834,25 @@ vectorizable_live_operation (vec_info *vinfo,
> stmt_vec_info stmt_info,
> gcc_assert (ncopies == 1 && !slp_node);
> if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
> OPTIMIZE_FOR_SPEED))
> - vect_record_loop_mask (loop_vinfo,
> - &LOOP_VINFO_MASKS (loop_vinfo),
> - 1, vectype, NULL);
> + {
> + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
> + && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
> + && !direct_internal_fn_supported_p (IFN_EXTRACT_LAST,
> + vectype,
> + OPTIMIZE_FOR_SPEED))
I don't quite understand this part - this is guarded by
direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
OPTIMIZE_FOR_SPEED)
unless I'm mis-matching this means the checked
!direct_internal_fn_supported_p condition is always false?
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "can't operate on partial vectors "
> + "because the target doesn't support extract "
> + "first reduction.\n");
> + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
> + }
> + else
> + vect_record_loop_mask (loop_vinfo,
> + &LOOP_VINFO_MASKS (loop_vinfo),
> + 1, vectype, NULL);
> + }
> else if (can_vec_extract_var_idx_p (
> TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE (vectype))))
> vect_record_loop_len (loop_vinfo,
>
>
>
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)