On Fri, Jan 3, 2020 at 9:31 AM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > For a patch I'm going to post next I need to be able to tweak > prefer_vector_width= for simd clones (the thing is, in the declare simd > clones it makes no sense to restrict to a subset of vector sizes the > selected ISA is capable of handling, the choice what vectorization factor > and ABI for it is done during vectorization of the caller, the simd clones > already get their arguments in xmm or ymm or zmm registers and it makes e.g. > no sense for the zmm variant to extract from the zmm registers, perform > all computations in xmm only and finally store into zmm again). > For that prefer-vector-width= really needs to be something that can be > specified in target attribute that simd clones use; I think it can be useful > for other use cases too, to override this tuning, some function might target > a different CPU than the rest of the code.
Yes, I agree with the above. > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2020-01-03 Jakub Jelinek <ja...@redhat.com> > > PR target/93089 > * config/i386/i386.opt (x_prefer_vector_width_type): Remove TargetSave > entry. > (mprefer-vector-width=): Add Save. > * config/i386/i386-options.c (ix86_target_string): Add PVW argument, > print > -mprefer-vector-width= if non-zero. Fix up -mfpmath= comment. > (ix86_debug_options, ix86_function_specific_print): Adjust > ix86_target_string callers. > (ix86_valid_target_attribute_inner_p): Handle prefer-vector-width=. > (ix86_valid_target_attribute_tree): Likewise. > * config/i386/i386-options.h (ix86_target_string): Add PVW argument. > * config/i386/i386-expand.c (ix86_expand_builtin): Adjust > ix86_target_string caller. > > * gcc.target/i386/pr93089-1.c: New test. LGTM. Thanks, Uros. > --- gcc/config/i386/i386.opt.jj 2020-01-01 12:16:10.228273887 +0100 > +++ gcc/config/i386/i386.opt 2020-01-02 14:34:47.497328338 +0100 > @@ -182,10 +182,6 @@ int x_ix86_tune_no_default > TargetSave > enum ix86_veclibabi x_ix86_veclibabi_type > > -;; -mprefer-vector-width= > -TargetSave > -enum prefer_vector_width x_prefer_vector_width_type > - > ;; x86 options > m128bit-long-double > Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) Save > @@ -595,7 +591,7 @@ Target Alias(mprefer-vector-width=, 128, > Use 128-bit AVX instructions instead of 256-bit AVX instructions in the > auto-vectorizer. > > mprefer-vector-width= > -Target Report RejectNegative Joined Var(prefer_vector_width_type) > Enum(prefer_vector_width) Init(PVW_NONE) > +Target Report RejectNegative Joined Var(prefer_vector_width_type) > Enum(prefer_vector_width) Init(PVW_NONE) Save > Use given register vector width instructions instead of maximum register > width in the auto-vectorizer. > > Enum > --- gcc/config/i386/i386-options.c.jj 2020-01-01 12:16:13.329227004 +0100 > +++ gcc/config/i386/i386-options.c 2020-01-02 14:00:40.299515822 +0100 > @@ -339,7 +339,9 @@ char * > ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, > int flags, int flags2, > const char *arch, const char *tune, > - enum fpmath_unit fpmath, bool add_nl_p, bool add_abi_p) > + enum fpmath_unit fpmath, > + enum prefer_vector_width pvw, > + bool add_nl_p, bool add_abi_p) > { > /* Flag options. */ > static struct ix86_target_opts flag_opts[] = > @@ -493,7 +495,7 @@ ix86_target_string (HOST_WIDE_INT isa, H > sprintf (flags2_other, "(other flags2: %#x)", flags2); > } > > - /* Add -fpmath= option. */ > + /* Add -mfpmath= option. */ > if (fpmath) > { > opts[num][0] = "-mfpmath="; > @@ -516,6 +518,29 @@ ix86_target_string (HOST_WIDE_INT isa, H > } > } > > + /* Add -mprefer-vector-width= option. */ > + if (pvw) > + { > + opts[num][0] = "-mprefer-vector-width="; > + switch ((int) pvw) > + { > + case PVW_AVX128: > + opts[num++][1] = "128"; > + break; > + > + case PVW_AVX256: > + opts[num++][1] = "256"; > + break; > + > + case PVW_AVX512: > + opts[num++][1] = "512"; > + break; > + > + default: > + gcc_unreachable (); > + } > + } > + > /* Any options? */ > if (num == 0) > return NULL; > @@ -579,8 +604,9 @@ ix86_debug_options (void) > { > char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2, > target_flags, ix86_target_flags, > - ix86_arch_string,ix86_tune_string, > - ix86_fpmath, true, true); > + ix86_arch_string, ix86_tune_string, > + ix86_fpmath, prefer_vector_width_type, > + true, true); > > if (opts) > { > @@ -847,7 +873,8 @@ ix86_function_specific_print (FILE *file > char *target_string > = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2, > ptr->x_target_flags, ptr->x_ix86_target_flags, > - NULL, NULL, ptr->x_ix86_fpmath, false, true); > + NULL, NULL, ptr->x_ix86_fpmath, > + ptr->x_prefer_vector_width_type, false, true); > > gcc_assert (ptr->arch < PROCESSOR_max); > fprintf (file, "%*sarch = %d (%s)\n", > @@ -992,6 +1019,7 @@ ix86_valid_target_attribute_inner_p (tre > > /* enum options */ > IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), > + IX86_ATTR_ENUM ("prefer-vector-width=", OPT_mprefer_vector_width_), > > /* string options */ > IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), > @@ -1213,6 +1241,7 @@ ix86_valid_target_attribute_tree (tree f > const char *orig_arch_string = opts->x_ix86_arch_string; > const char *orig_tune_string = opts->x_ix86_tune_string; > enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath; > + enum prefer_vector_width orig_pvw_set = > opts_set->x_prefer_vector_width_type; > int orig_tune_defaulted = ix86_tune_defaulted; > int orig_arch_specified = ix86_arch_specified; > char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL }; > @@ -1238,7 +1267,8 @@ ix86_valid_target_attribute_tree (tree f > || opts->x_target_flags != def->x_target_flags > || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] > || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] > - || enum_opts_set.x_ix86_fpmath) > + || enum_opts_set.x_ix86_fpmath > + || enum_opts_set.x_prefer_vector_width_type) > { > /* If we are using the default tune= or arch=, undo the string > assigned, > and use the default. */ > @@ -1257,6 +1287,8 @@ ix86_valid_target_attribute_tree (tree f > /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ > if (enum_opts_set.x_ix86_fpmath) > opts_set->x_ix86_fpmath = (enum fpmath_unit) 1; > + if (enum_opts_set.x_prefer_vector_width_type) > + opts_set->x_prefer_vector_width_type = (enum prefer_vector_width) 1; > > /* Do any overrides, such as arch=xxx, or tune=xxx support. */ > bool r = ix86_option_override_internal (false, opts, opts_set); > @@ -1276,6 +1308,7 @@ ix86_valid_target_attribute_tree (tree f > opts->x_ix86_arch_string = orig_arch_string; > opts->x_ix86_tune_string = orig_tune_string; > opts_set->x_ix86_fpmath = orig_fpmath_set; > + opts_set->x_prefer_vector_width_type = orig_pvw_set; > > release_options_strings (option_strings); > } > --- gcc/config/i386/i386-options.h.jj 2020-01-01 12:16:13.673221803 +0100 > +++ gcc/config/i386/i386-options.h 2020-01-02 14:00:40.298515838 +0100 > @@ -25,7 +25,8 @@ extern int ix86_omp_device_kind_arch_isa > char *ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, > int flags, int flags2, > const char *arch, const char *tune, > - enum fpmath_unit fpmath, bool add_nl_p, > + enum fpmath_unit fpmath, > + enum prefer_vector_width pvw, bool add_nl_p, > bool add_abi_p); > > extern enum attr_cpu ix86_schedule; > --- gcc/config/i386/i386-expand.c.jj 2020-01-01 12:16:10.570268716 +0100 > +++ gcc/config/i386/i386-expand.c 2020-01-02 14:00:40.297515853 +0100 > @@ -11009,7 +11009,9 @@ ix86_expand_builtin (tree exp, rtx targe > else > bisa |= OPTION_MASK_ABI_64; > char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL, > - (enum fpmath_unit) 0, false, > add_abi_p); > + (enum fpmath_unit) 0, > + (enum prefer_vector_width) 0, > + false, add_abi_p); > if (!opts) > error ("%qE needs unknown isa option", fndecl); > else > --- gcc/testsuite/gcc.target/i386/pr93089-1.c.jj 2020-01-02 > 14:49:05.615271117 +0100 > +++ gcc/testsuite/gcc.target/i386/pr93089-1.c 2020-01-02 14:49:19.641058972 > +0100 > @@ -0,0 +1,24 @@ > +/* PR target/93089 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -mavx512f -mtune=skylake-avx512" } */ > +/* { dg-final { scan-assembler "vmulps\[^\n\r]*zmm" } } */ > +/* { dg-final { scan-assembler-not "vaddps\[^\n\r]*zmm" } } */ > +/* { dg-final { scan-assembler "vaddps\[^\n\r]*ymm" } } */ > + > +float a[16], b[16]; > + > +__attribute__((target ("prefer-vector-width=512"))) void > +foo (void) > +{ > + int i; > + for (i = 0; i < 16; ++i) > + b[i] = 3.0f * a[i]; > +} > + > +void > +bar (void) > +{ > + int i; > + for (i = 0; i < 16; ++i) > + b[i] = 3.0f + a[i]; > +} > > Jakub >