On Fri, Jan 3, 2020 at 9:31 AM Jakub Jelinek <ja...@redhat.com> wrote:
>
> Hi!
>
> For a patch I'm going to post next I need to be able to tweak
> prefer_vector_width= for simd clones (the thing is, in the declare simd
> clones it makes no sense to restrict to a subset of vector sizes the
> selected ISA is capable of handling, the choice what vectorization factor
> and ABI for it is done during vectorization of the caller, the simd clones
> already get their arguments in xmm or ymm or zmm registers and it makes e.g.
> no sense for the zmm variant to extract from the zmm registers, perform
> all computations in xmm only and finally store into zmm again).
> For that prefer-vector-width= really needs to be something that can be
> specified in target attribute that simd clones use; I think it can be useful
> for other use cases too, to override this tuning, some function might target
> a different CPU than the rest of the code.

Yes, I agree with the above.

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2020-01-03  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/93089
>         * config/i386/i386.opt (x_prefer_vector_width_type): Remove TargetSave
>         entry.
>         (mprefer-vector-width=): Add Save.
>         * config/i386/i386-options.c (ix86_target_string): Add PVW argument, 
> print
>         -mprefer-vector-width= if non-zero.  Fix up -mfpmath= comment.
>         (ix86_debug_options, ix86_function_specific_print): Adjust
>         ix86_target_string callers.
>         (ix86_valid_target_attribute_inner_p): Handle prefer-vector-width=.
>         (ix86_valid_target_attribute_tree): Likewise.
>         * config/i386/i386-options.h (ix86_target_string): Add PVW argument.
>         * config/i386/i386-expand.c (ix86_expand_builtin): Adjust
>         ix86_target_string caller.
>
>         * gcc.target/i386/pr93089-1.c: New test.

LGTM.

Thanks,
Uros.

> --- gcc/config/i386/i386.opt.jj 2020-01-01 12:16:10.228273887 +0100
> +++ gcc/config/i386/i386.opt    2020-01-02 14:34:47.497328338 +0100
> @@ -182,10 +182,6 @@ int x_ix86_tune_no_default
>  TargetSave
>  enum ix86_veclibabi x_ix86_veclibabi_type
>
> -;; -mprefer-vector-width=
> -TargetSave
> -enum prefer_vector_width x_prefer_vector_width_type
> -
>  ;; x86 options
>  m128bit-long-double
>  Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) Save
> @@ -595,7 +591,7 @@ Target Alias(mprefer-vector-width=, 128,
>  Use 128-bit AVX instructions instead of 256-bit AVX instructions in the 
> auto-vectorizer.
>
>  mprefer-vector-width=
> -Target Report RejectNegative Joined Var(prefer_vector_width_type) 
> Enum(prefer_vector_width) Init(PVW_NONE)
> +Target Report RejectNegative Joined Var(prefer_vector_width_type) 
> Enum(prefer_vector_width) Init(PVW_NONE) Save
>  Use given register vector width instructions instead of maximum register 
> width in the auto-vectorizer.
>
>  Enum
> --- gcc/config/i386/i386-options.c.jj   2020-01-01 12:16:13.329227004 +0100
> +++ gcc/config/i386/i386-options.c      2020-01-02 14:00:40.299515822 +0100
> @@ -339,7 +339,9 @@ char *
>  ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
>                     int flags, int flags2,
>                     const char *arch, const char *tune,
> -                   enum fpmath_unit fpmath, bool add_nl_p, bool add_abi_p)
> +                   enum fpmath_unit fpmath,
> +                   enum prefer_vector_width pvw,
> +                   bool add_nl_p, bool add_abi_p)
>  {
>    /* Flag options.  */
>    static struct ix86_target_opts flag_opts[] =
> @@ -493,7 +495,7 @@ ix86_target_string (HOST_WIDE_INT isa, H
>        sprintf (flags2_other, "(other flags2: %#x)", flags2);
>      }
>
> -  /* Add -fpmath= option.  */
> +  /* Add -mfpmath= option.  */
>    if (fpmath)
>      {
>        opts[num][0] = "-mfpmath=";
> @@ -516,6 +518,29 @@ ix86_target_string (HOST_WIDE_INT isa, H
>         }
>      }
>
> +  /* Add -mprefer-vector-width= option.  */
> +  if (pvw)
> +    {
> +      opts[num][0] = "-mprefer-vector-width=";
> +      switch ((int) pvw)
> +       {
> +       case PVW_AVX128:
> +         opts[num++][1] = "128";
> +         break;
> +
> +       case PVW_AVX256:
> +         opts[num++][1] = "256";
> +         break;
> +
> +       case PVW_AVX512:
> +         opts[num++][1] = "512";
> +         break;
> +
> +       default:
> +         gcc_unreachable ();
> +       }
> +    }
> +
>    /* Any options?  */
>    if (num == 0)
>      return NULL;
> @@ -579,8 +604,9 @@ ix86_debug_options (void)
>  {
>    char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2,
>                                    target_flags, ix86_target_flags,
> -                                  ix86_arch_string,ix86_tune_string,
> -                                  ix86_fpmath, true, true);
> +                                  ix86_arch_string, ix86_tune_string,
> +                                  ix86_fpmath, prefer_vector_width_type,
> +                                  true, true);
>
>    if (opts)
>      {
> @@ -847,7 +873,8 @@ ix86_function_specific_print (FILE *file
>    char *target_string
>      = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2,
>                           ptr->x_target_flags, ptr->x_ix86_target_flags,
> -                         NULL, NULL, ptr->x_ix86_fpmath, false, true);
> +                         NULL, NULL, ptr->x_ix86_fpmath,
> +                         ptr->x_prefer_vector_width_type, false, true);
>
>    gcc_assert (ptr->arch < PROCESSOR_max);
>    fprintf (file, "%*sarch = %d (%s)\n",
> @@ -992,6 +1019,7 @@ ix86_valid_target_attribute_inner_p (tre
>
>      /* enum options */
>      IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
> +    IX86_ATTR_ENUM ("prefer-vector-width=", OPT_mprefer_vector_width_),
>
>      /* string options */
>      IX86_ATTR_STR ("arch=",    IX86_FUNCTION_SPECIFIC_ARCH),
> @@ -1213,6 +1241,7 @@ ix86_valid_target_attribute_tree (tree f
>    const char *orig_arch_string = opts->x_ix86_arch_string;
>    const char *orig_tune_string = opts->x_ix86_tune_string;
>    enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
> +  enum prefer_vector_width orig_pvw_set = 
> opts_set->x_prefer_vector_width_type;
>    int orig_tune_defaulted = ix86_tune_defaulted;
>    int orig_arch_specified = ix86_arch_specified;
>    char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
> @@ -1238,7 +1267,8 @@ ix86_valid_target_attribute_tree (tree f
>        || opts->x_target_flags != def->x_target_flags
>        || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
>        || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
> -      || enum_opts_set.x_ix86_fpmath)
> +      || enum_opts_set.x_ix86_fpmath
> +      || enum_opts_set.x_prefer_vector_width_type)
>      {
>        /* If we are using the default tune= or arch=, undo the string 
> assigned,
>          and use the default.  */
> @@ -1257,6 +1287,8 @@ ix86_valid_target_attribute_tree (tree f
>        /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
>        if (enum_opts_set.x_ix86_fpmath)
>         opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
> +      if (enum_opts_set.x_prefer_vector_width_type)
> +       opts_set->x_prefer_vector_width_type = (enum prefer_vector_width) 1;
>
>        /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
>        bool r = ix86_option_override_internal (false, opts, opts_set);
> @@ -1276,6 +1308,7 @@ ix86_valid_target_attribute_tree (tree f
>        opts->x_ix86_arch_string = orig_arch_string;
>        opts->x_ix86_tune_string = orig_tune_string;
>        opts_set->x_ix86_fpmath = orig_fpmath_set;
> +      opts_set->x_prefer_vector_width_type = orig_pvw_set;
>
>        release_options_strings (option_strings);
>      }
> --- gcc/config/i386/i386-options.h.jj   2020-01-01 12:16:13.673221803 +0100
> +++ gcc/config/i386/i386-options.h      2020-01-02 14:00:40.298515838 +0100
> @@ -25,7 +25,8 @@ extern int ix86_omp_device_kind_arch_isa
>  char *ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
>                           int flags, int flags2,
>                           const char *arch, const char *tune,
> -                         enum fpmath_unit fpmath, bool add_nl_p,
> +                         enum fpmath_unit fpmath,
> +                         enum prefer_vector_width pvw, bool add_nl_p,
>                           bool add_abi_p);
>
>  extern enum attr_cpu ix86_schedule;
> --- gcc/config/i386/i386-expand.c.jj    2020-01-01 12:16:10.570268716 +0100
> +++ gcc/config/i386/i386-expand.c       2020-01-02 14:00:40.297515853 +0100
> @@ -11009,7 +11009,9 @@ ix86_expand_builtin (tree exp, rtx targe
>        else
>         bisa |= OPTION_MASK_ABI_64;
>        char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
> -                                      (enum fpmath_unit) 0, false, 
> add_abi_p);
> +                                      (enum fpmath_unit) 0,
> +                                      (enum prefer_vector_width) 0,
> +                                      false, add_abi_p);
>        if (!opts)
>         error ("%qE needs unknown isa option", fndecl);
>        else
> --- gcc/testsuite/gcc.target/i386/pr93089-1.c.jj        2020-01-02 
> 14:49:05.615271117 +0100
> +++ gcc/testsuite/gcc.target/i386/pr93089-1.c   2020-01-02 14:49:19.641058972 
> +0100
> @@ -0,0 +1,24 @@
> +/* PR target/93089 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -mavx512f -mtune=skylake-avx512" } */
> +/* { dg-final { scan-assembler "vmulps\[^\n\r]*zmm" } } */
> +/* { dg-final { scan-assembler-not "vaddps\[^\n\r]*zmm" } } */
> +/* { dg-final { scan-assembler "vaddps\[^\n\r]*ymm" } } */
> +
> +float a[16], b[16];
> +
> +__attribute__((target ("prefer-vector-width=512"))) void
> +foo (void)
> +{
> +  int i;
> +  for (i = 0; i < 16; ++i)
> +    b[i] = 3.0f * a[i];
> +}
> +
> +void
> +bar (void)
> +{
> +  int i;
> +  for (i = 0; i < 16; ++i)
> +    b[i] = 3.0f + a[i];
> +}
>
>         Jakub
>

Reply via email to