On Thu, 24 Aug 2023, Juzhe-Zhong wrote:

> Hi.
> 
> This patch is apply LEN_FOLD_EXTRACT_LAST into loop vectorizer.
> 
> Consider this following case:
> #include <stdint.h>
> 
> #define N 32
> 
> /* Simple condition reduction.  */
> 
> int __attribute__ ((noinline, noclone))
> condition_reduction (int *a, int min_v)
> {
>   int last = 66; /* High start value.  */
> 
>   for (int i = 0; i < N; i++)
>     if (a[i] < min_v)
>       last = i;
> 
>   return last;
> }
> 
> With this patch, we can generate this following IR:
> 
>   _44 = .SELECT_VL (ivtmp_42, POLY_INT_CST [4, 4]);
>   _34 = vect_vec_iv_.5_33 + { POLY_INT_CST [4, 4], ... };
>   ivtmp_36 = _44 * 4;
>   vect__4.8_39 = .MASK_LEN_LOAD (vectp_a.6_37, 32B, { -1, ... }, _44, 0);
> 
>   mask__11.9_41 = vect__4.8_39 < vect_cst__40;
>   last_5 = .LEN_FOLD_EXTRACT_LAST (last_14, mask__11.9_41, vect_vec_iv_.5_33, 
> _44, 0);
>   ...

LGTM.

Thanks,
Richard.

> gcc/ChangeLog:
> 
>         * tree-vect-loop.cc (vectorizable_reduction): Apply 
> LEN_FOLD_EXTRACT_LAST.
>         * tree-vect-stmts.cc (vectorizable_condition): Ditto.
> 
> ---
>  gcc/tree-vect-loop.cc  |  7 ++++--
>  gcc/tree-vect-stmts.cc | 52 ++++++++++++++++++++++++++++++++++++------
>  2 files changed, 50 insertions(+), 9 deletions(-)
> 
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 1cd6c291377..ebee8037e02 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -7494,8 +7494,11 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
>       }
>  
>        if (reduc_chain_length == 1
> -       && direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
> -                                          vectype_in, OPTIMIZE_FOR_SPEED))
> +       && (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST, vectype_in,
> +                                           OPTIMIZE_FOR_SPEED)
> +           || direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
> +                                              vectype_in,
> +                                              OPTIMIZE_FOR_SPEED)))
>       {
>         if (dump_enabled_p ())
>           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 413a88750d6..be9f3a280bd 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -11740,8 +11740,17 @@ vectorizable_condition (vec_info *vinfo,
>         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
>       {
>         if (reduction_type == EXTRACT_LAST_REDUCTION)
> -         vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
> -                                ncopies * vec_num, vectype, NULL);
> +         {
> +           if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
> +                                               vectype, OPTIMIZE_FOR_SPEED))
> +             vect_record_loop_len (loop_vinfo,
> +                                   &LOOP_VINFO_LENS (loop_vinfo),
> +                                   ncopies * vec_num, vectype, 1);
> +           else
> +             vect_record_loop_mask (loop_vinfo,
> +                                    &LOOP_VINFO_MASKS (loop_vinfo),
> +                                    ncopies * vec_num, vectype, NULL);
> +         }
>         /* Extra inactive lanes should be safe for vect_nested_cycle.  */
>         else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
>           {
> @@ -11772,7 +11781,13 @@ vectorizable_condition (vec_info *vinfo,
>       mask to the condition, or to its inverse.  */
>  
>    vec_loop_masks *masks = NULL;
> -  if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
> +  vec_loop_lens *lens = NULL;
> +  if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
> +    {
> +      if (reduction_type == EXTRACT_LAST_REDUCTION)
> +     lens = &LOOP_VINFO_LENS (loop_vinfo);
> +    }
> +  else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
>      {
>        if (reduction_type == EXTRACT_LAST_REDUCTION)
>       masks = &LOOP_VINFO_MASKS (loop_vinfo);
> @@ -11910,7 +11925,8 @@ vectorizable_condition (vec_info *vinfo,
>        /* Force vec_compare to be an SSA_NAME rather than a comparison,
>        in cases where that's necessary.  */
>  
> -      if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
> +      tree len = NULL_TREE, bias = NULL_TREE;
> +      if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
>       {
>         if (!is_gimple_val (vec_compare))
>           {
> @@ -11931,6 +11947,23 @@ vectorizable_condition (vec_info *vinfo,
>             vec_compare = vec_compare_name;
>           }
>  
> +       if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
> +                                           vectype, OPTIMIZE_FOR_SPEED))
> +         {
> +           if (lens)
> +             {
> +               len = vect_get_loop_len (loop_vinfo, gsi, lens,
> +                                        vec_num * ncopies, vectype, i, 1);
> +               signed char biasval
> +                 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> +               bias = build_int_cst (intQI_type_node, biasval);
> +             }
> +           else
> +             {
> +               len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> +               bias = build_int_cst (intQI_type_node, 0);
> +             }
> +         }
>         if (masks)
>           {
>             tree loop_mask
> @@ -11950,9 +11983,14 @@ vectorizable_condition (vec_info *vinfo,
>       {
>         gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
>         tree lhs = gimple_get_lhs (old_stmt);
> -       new_stmt = gimple_build_call_internal
> -           (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
> -            vec_then_clause);
> +       if (len)
> +         new_stmt = gimple_build_call_internal
> +             (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
> +              vec_then_clause, len, bias);
> +       else
> +         new_stmt = gimple_build_call_internal
> +             (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
> +              vec_then_clause);
>         gimple_call_set_lhs (new_stmt, lhs);
>         SSA_NAME_DEF_STMT (lhs) = new_stmt;
>         if (old_stmt == gsi_stmt (*gsi))
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to