Hi! For a patch I'm going to post next I need to be able to tweak prefer_vector_width= for simd clones (the thing is, in the declare simd clones it makes no sense to restrict to a subset of vector sizes the selected ISA is capable of handling, the choice what vectorization factor and ABI for it is done during vectorization of the caller, the simd clones already get their arguments in xmm or ymm or zmm registers and it makes e.g. no sense for the zmm variant to extract from the zmm registers, perform all computations in xmm only and finally store into zmm again). For that prefer-vector-width= really needs to be something that can be specified in target attribute that simd clones use; I think it can be useful for other use cases too, to override this tuning, some function might target a different CPU than the rest of the code.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2020-01-03 Jakub Jelinek <ja...@redhat.com> PR target/93089 * config/i386/i386.opt (x_prefer_vector_width_type): Remove TargetSave entry. (mprefer-vector-width=): Add Save. * config/i386/i386-options.c (ix86_target_string): Add PVW argument, print -mprefer-vector-width= if non-zero. Fix up -mfpmath= comment. (ix86_debug_options, ix86_function_specific_print): Adjust ix86_target_string callers. (ix86_valid_target_attribute_inner_p): Handle prefer-vector-width=. (ix86_valid_target_attribute_tree): Likewise. * config/i386/i386-options.h (ix86_target_string): Add PVW argument. * config/i386/i386-expand.c (ix86_expand_builtin): Adjust ix86_target_string caller. * gcc.target/i386/pr93089-1.c: New test. --- gcc/config/i386/i386.opt.jj 2020-01-01 12:16:10.228273887 +0100 +++ gcc/config/i386/i386.opt 2020-01-02 14:34:47.497328338 +0100 @@ -182,10 +182,6 @@ int x_ix86_tune_no_default TargetSave enum ix86_veclibabi x_ix86_veclibabi_type -;; -mprefer-vector-width= -TargetSave -enum prefer_vector_width x_prefer_vector_width_type - ;; x86 options m128bit-long-double Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) Save @@ -595,7 +591,7 @@ Target Alias(mprefer-vector-width=, 128, Use 128-bit AVX instructions instead of 256-bit AVX instructions in the auto-vectorizer. mprefer-vector-width= -Target Report RejectNegative Joined Var(prefer_vector_width_type) Enum(prefer_vector_width) Init(PVW_NONE) +Target Report RejectNegative Joined Var(prefer_vector_width_type) Enum(prefer_vector_width) Init(PVW_NONE) Save Use given register vector width instructions instead of maximum register width in the auto-vectorizer. Enum --- gcc/config/i386/i386-options.c.jj 2020-01-01 12:16:13.329227004 +0100 +++ gcc/config/i386/i386-options.c 2020-01-02 14:00:40.299515822 +0100 @@ -339,7 +339,9 @@ char * ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, int flags, int flags2, const char *arch, const char *tune, - enum fpmath_unit fpmath, bool add_nl_p, bool add_abi_p) + enum fpmath_unit fpmath, + enum prefer_vector_width pvw, + bool add_nl_p, bool add_abi_p) { /* Flag options. */ static struct ix86_target_opts flag_opts[] = @@ -493,7 +495,7 @@ ix86_target_string (HOST_WIDE_INT isa, H sprintf (flags2_other, "(other flags2: %#x)", flags2); } - /* Add -fpmath= option. */ + /* Add -mfpmath= option. */ if (fpmath) { opts[num][0] = "-mfpmath="; @@ -516,6 +518,29 @@ ix86_target_string (HOST_WIDE_INT isa, H } } + /* Add -mprefer-vector-width= option. */ + if (pvw) + { + opts[num][0] = "-mprefer-vector-width="; + switch ((int) pvw) + { + case PVW_AVX128: + opts[num++][1] = "128"; + break; + + case PVW_AVX256: + opts[num++][1] = "256"; + break; + + case PVW_AVX512: + opts[num++][1] = "512"; + break; + + default: + gcc_unreachable (); + } + } + /* Any options? */ if (num == 0) return NULL; @@ -579,8 +604,9 @@ ix86_debug_options (void) { char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2, target_flags, ix86_target_flags, - ix86_arch_string,ix86_tune_string, - ix86_fpmath, true, true); + ix86_arch_string, ix86_tune_string, + ix86_fpmath, prefer_vector_width_type, + true, true); if (opts) { @@ -847,7 +873,8 @@ ix86_function_specific_print (FILE *file char *target_string = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2, ptr->x_target_flags, ptr->x_ix86_target_flags, - NULL, NULL, ptr->x_ix86_fpmath, false, true); + NULL, NULL, ptr->x_ix86_fpmath, + ptr->x_prefer_vector_width_type, false, true); gcc_assert (ptr->arch < PROCESSOR_max); fprintf (file, "%*sarch = %d (%s)\n", @@ -992,6 +1019,7 @@ ix86_valid_target_attribute_inner_p (tre /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), + IX86_ATTR_ENUM ("prefer-vector-width=", OPT_mprefer_vector_width_), /* string options */ IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), @@ -1213,6 +1241,7 @@ ix86_valid_target_attribute_tree (tree f const char *orig_arch_string = opts->x_ix86_arch_string; const char *orig_tune_string = opts->x_ix86_tune_string; enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath; + enum prefer_vector_width orig_pvw_set = opts_set->x_prefer_vector_width_type; int orig_tune_defaulted = ix86_tune_defaulted; int orig_arch_specified = ix86_arch_specified; char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL }; @@ -1238,7 +1267,8 @@ ix86_valid_target_attribute_tree (tree f || opts->x_target_flags != def->x_target_flags || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] - || enum_opts_set.x_ix86_fpmath) + || enum_opts_set.x_ix86_fpmath + || enum_opts_set.x_prefer_vector_width_type) { /* If we are using the default tune= or arch=, undo the string assigned, and use the default. */ @@ -1257,6 +1287,8 @@ ix86_valid_target_attribute_tree (tree f /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ if (enum_opts_set.x_ix86_fpmath) opts_set->x_ix86_fpmath = (enum fpmath_unit) 1; + if (enum_opts_set.x_prefer_vector_width_type) + opts_set->x_prefer_vector_width_type = (enum prefer_vector_width) 1; /* Do any overrides, such as arch=xxx, or tune=xxx support. */ bool r = ix86_option_override_internal (false, opts, opts_set); @@ -1276,6 +1308,7 @@ ix86_valid_target_attribute_tree (tree f opts->x_ix86_arch_string = orig_arch_string; opts->x_ix86_tune_string = orig_tune_string; opts_set->x_ix86_fpmath = orig_fpmath_set; + opts_set->x_prefer_vector_width_type = orig_pvw_set; release_options_strings (option_strings); } --- gcc/config/i386/i386-options.h.jj 2020-01-01 12:16:13.673221803 +0100 +++ gcc/config/i386/i386-options.h 2020-01-02 14:00:40.298515838 +0100 @@ -25,7 +25,8 @@ extern int ix86_omp_device_kind_arch_isa char *ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, int flags, int flags2, const char *arch, const char *tune, - enum fpmath_unit fpmath, bool add_nl_p, + enum fpmath_unit fpmath, + enum prefer_vector_width pvw, bool add_nl_p, bool add_abi_p); extern enum attr_cpu ix86_schedule; --- gcc/config/i386/i386-expand.c.jj 2020-01-01 12:16:10.570268716 +0100 +++ gcc/config/i386/i386-expand.c 2020-01-02 14:00:40.297515853 +0100 @@ -11009,7 +11009,9 @@ ix86_expand_builtin (tree exp, rtx targe else bisa |= OPTION_MASK_ABI_64; char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL, - (enum fpmath_unit) 0, false, add_abi_p); + (enum fpmath_unit) 0, + (enum prefer_vector_width) 0, + false, add_abi_p); if (!opts) error ("%qE needs unknown isa option", fndecl); else --- gcc/testsuite/gcc.target/i386/pr93089-1.c.jj 2020-01-02 14:49:05.615271117 +0100 +++ gcc/testsuite/gcc.target/i386/pr93089-1.c 2020-01-02 14:49:19.641058972 +0100 @@ -0,0 +1,24 @@ +/* PR target/93089 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512f -mtune=skylake-avx512" } */ +/* { dg-final { scan-assembler "vmulps\[^\n\r]*zmm" } } */ +/* { dg-final { scan-assembler-not "vaddps\[^\n\r]*zmm" } } */ +/* { dg-final { scan-assembler "vaddps\[^\n\r]*ymm" } } */ + +float a[16], b[16]; + +__attribute__((target ("prefer-vector-width=512"))) void +foo (void) +{ + int i; + for (i = 0; i < 16; ++i) + b[i] = 3.0f * a[i]; +} + +void +bar (void) +{ + int i; + for (i = 0; i < 16; ++i) + b[i] = 3.0f + a[i]; +} Jakub