I have committed the attached patch. Uros.
On Tue, Nov 21, 2017 at 6:18 PM, Shalnov, Sergey <sergey.shal...@intel.com> wrote: > Uros, > Yes, please. Thank you for your proposals and comments. > Please commit as you proposed. > Sergey > > -----Original Message----- > From: Uros Bizjak [mailto:ubiz...@gmail.com] > Sent: Tuesday, November 21, 2017 6:13 PM > To: Shalnov, Sergey <sergey.shal...@intel.com> > Cc: gcc-patches@gcc.gnu.org; kirill.yuk...@gmail.com; Koval, Julia > <julia.ko...@intel.com>; Senkevich, Andrew <andrew.senkev...@intel.com>; > Peryt, Sebastian <sebastian.pe...@intel.com>; Ivchenko, Alexander > <alexander.ivche...@intel.com>; Joseph Myers <jos...@codesourcery.com> > Subject: Re: [PATCH, i386] Refactor -mprefer-avx[128|256] options into common > -mprefer-vector-width=[none|128|256|512] > > On Tue, Nov 21, 2017 at 4:50 PM, Shalnov, Sergey <sergey.shal...@intel.com> > wrote: >> Uros, >> I did new patch with all comments addressed as proposed. >> 1. old option -mprefer-avx128 is Alias(mprefer-vector-width=, 128, >> none) 2. Simplified default initialization (as Bernhard proposed) 3. >> Fixed documentation (proposed by Sandra) 4. Several tests are changed >> to use new style of the option but many leaved with -mprefer-avx128 >> (one test with new style -mprefer-vector-width=128) >> >> >> 2017-11-21 Sergey Shalnov <sergey.shal...@intel.com> >> >> gcc/ >> * config/i386/i386-opts.h (enum prefer_vector_width): Added new enum >> for the new option -mprefer-vector-width=[none|128|256|512]. >> * config/i386/i386.c (ix86_target_string): remove old style options >> -mprefer-avx256 and make -mprefer-avx128 as alias. >> (ix86_option_override_internal): Apply defaults for the >> -mprefer-vector-width=[128|256] option. >> * config/i386/i386.h (TARGET_PREFER_AVX128, TARGET_PREFER_AVX256): >> Implement macros to work with -mprefer-vector-width=. >> * config/i386/i386.opt: Implemented option >> -mprefer-vector-width=[none|128|256|512]. >> * doc/invoke.texi: Documentation for >> -mprefer-vector-width=[none|128|256|512]. >> >> gcc/testsuite/ >> * g++.dg/ext/pr57362.C (__attribute__): Apply new option syntax. >> * g++.dg/torture/pr81249.C: Ditto. >> * gcc.target/i386/avx512f-constant-float-return.c: Ditto. >> * gcc.target/i386/avx512f-prefer.c: Ditto. >> * gcc.target/i386/pr82460-2.c: Ditto. >> >> Please merge this patch if you think it is acceptable. >> Thank you >> Sergey > > mprefer-avx128 > -Target Report Mask(PREFER_AVX128) Save > -Use 128-bit AVX instructions instead of 256-bit AVX instructions in the > auto-vectorizer. > +Target Undocumented Alias(mprefer-vector-width=, 128, none) > > For compatibility, I'd rather leave this option documented with: > > +Target Alias(mprefer-vector-width=, 128, 256) > > This would mean that in addition to -mprefer-avx128 switching to 128-bit AVX, > -mno-prefer-avx128 would switch to 256-bit AVX, as documented for the option. > > The patch is OK, and If you agree, I can commit the patch with the above > change. > > Thanks, > Uros.
Index: config/i386/i386-opts.h =================================================================== --- config/i386/i386-opts.h (revision 255016) +++ config/i386/i386-opts.h (working copy) @@ -99,4 +99,11 @@ enum stack_protector_guard { SSP_GLOBAL /* global canary */ }; +enum prefer_vector_width { + PVW_NONE, + PVW_AVX128, + PVW_AVX256, + PVW_AVX512 +}; + #endif Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 255016) +++ config/i386/i386.c (working copy) @@ -2847,7 +2847,6 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_I { "-mstv", MASK_STV }, { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD }, { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE }, - { "-mprefer-avx128", MASK_PREFER_AVX128 }, { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES } }; @@ -2854,8 +2853,7 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_I /* Additional flag options. */ static struct ix86_target_opts flag2_opts[] = { - { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY }, - { "-mprefer-avx256", OPTION_MASK_PREFER_AVX256 }, + { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY } }; const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts) @@ -4686,16 +4684,18 @@ ix86_option_override_internal (bool main_args_p, if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; + /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */ if (TARGET_AVX128_OPTIMAL - && !(opts_set->x_target_flags & MASK_PREFER_AVX128)) - opts->x_target_flags |= MASK_PREFER_AVX128; - /* Use 256-bit AVX instructions instead of 512-bit AVX instructions + && (opts_set->x_prefer_vector_width_type == PVW_NONE)) + opts->x_prefer_vector_width_type = PVW_AVX128; + + /* Use 256-bit AVX instruction generation in the auto-vectorizer. */ if (ix86_tune_features[X86_TUNE_AVX256_OPTIMAL] - && !(opts_set->x_ix86_target_flags & OPTION_MASK_PREFER_AVX256)) - opts->x_ix86_target_flags |= OPTION_MASK_PREFER_AVX256; + && (opts_set->x_prefer_vector_width_type == PVW_NONE)) + opts->x_prefer_vector_width_type = PVW_AVX256; if (opts->x_ix86_recip_name) { Index: config/i386/i386.h =================================================================== --- config/i386/i386.h (revision 255016) +++ config/i386/i386.h (working copy) @@ -2678,6 +2678,11 @@ extern void debug_dispatch_window (int); #define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0) #define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0) +/* Use 128-bit AVX instructions in the auto-vectorizer. */ +#define TARGET_PREFER_AVX128 (prefer_vector_width_type == PVW_AVX128) +/* Use 256-bit AVX instructions in the auto-vectorizer. */ +#define TARGET_PREFER_AVX256 (prefer_vector_width_type == PVW_AVX256) + #define IX86_HLE_ACQUIRE (1 << 16) #define IX86_HLE_RELEASE (1 << 17) Index: config/i386/i386.opt =================================================================== --- config/i386/i386.opt (revision 255016) +++ config/i386/i386.opt (working copy) @@ -182,6 +182,10 @@ int x_ix86_tune_no_default TargetSave enum ix86_veclibabi x_ix86_veclibabi_type +;; -mprefer-vector-width= +TargetSave +enum prefer_vector_width x_prefer_vector_width_type + ;; x86 options m128bit-long-double Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) Save @@ -588,13 +592,29 @@ Do dispatch scheduling if processor is bdver1, bdv or znver1 and Haifa scheduling is selected. mprefer-avx128 -Target Report Mask(PREFER_AVX128) Save +Target Alias(mprefer-vector-width=, 128, 256) Use 128-bit AVX instructions instead of 256-bit AVX instructions in the auto-vectorizer. -mprefer-avx256 -Target Report Mask(PREFER_AVX256) Var(ix86_target_flags) Save -Use 256-bit AVX instructions instead of 512-bit AVX instructions in the auto-vectorizer. +mprefer-vector-width= +Target Report RejectNegative Joined Var(prefer_vector_width_type) Enum(prefer_vector_width) Init(PVW_NONE) +Use given register vector width instructions instead of maximum register width in the auto-vectorizer. +Enum +Name(prefer_vector_width) Type(enum prefer_vector_width) +Known preferred register vector length (to use with the -mprefer-vector-width= option) + +EnumValue +Enum(prefer_vector_width) String(none) Value(PVW_NONE) + +EnumValue +Enum(prefer_vector_width) String(128) Value(PVW_AVX128) + +EnumValue +Enum(prefer_vector_width) String(256) Value(PVW_AVX256) + +EnumValue +Enum(prefer_vector_width) String(512) Value(PVW_AVX512) + ;; ISA support m32 Index: doc/invoke.texi =================================================================== --- doc/invoke.texi (revision 255016) +++ doc/invoke.texi (working copy) @@ -1196,7 +1196,7 @@ See RS/6000 and PowerPC Options. -mincoming-stack-boundary=@var{num} @gol -mcld -mcx16 -msahf -mmovbe -mcrc32 @gol -mrecip -mrecip=@var{opt} @gol --mvzeroupper -mprefer-avx128 -mprefer-avx256 @gol +-mvzeroupper -mprefer-avx128 -mprefer-vector-width=@var{opt} @gol -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol -mavx2 -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl @gol -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi -msha -maes @gol @@ -26093,11 +26093,25 @@ intrinsics. This option instructs GCC to use 128-bit AVX instructions instead of 256-bit AVX instructions in the auto-vectorizer. -@item -mprefer-avx256 -@opindex mprefer-avx256 -This option instructs GCC to use 256-bit AVX instructions instead of -512-bit AVX instructions in the auto-vectorizer. +@item -mprefer-vector-width=@var{opt} +@opindex mprefer-vector-width +This option instructs GCC to use @var{opt}-bit vector width in instructions +instead of default on the selected platform. +@table @samp +@item none +No extra limitations applied to GCC other than defined by the selected platform. + +@item 128 +Prefer 128-bit vector width for instructions. + +@item 256 +Prefer 256-bit vector width for instructions. + +@item 512 +Prefer 512-bit vector width for instructions. +@end table + @item -mcx16 @opindex mcx16 This option enables GCC to generate @code{CMPXCHG16B} instructions in 64-bit Index: testsuite/g++.dg/ext/pr57362.C =================================================================== --- testsuite/g++.dg/ext/pr57362.C (revision 255016) +++ testsuite/g++.dg/ext/pr57362.C (working copy) @@ -81,8 +81,10 @@ __attribute__((target("dispatch-scheduler"))) int foo(void) { return 1; } __attribute__((target("prefer-avx128"))) int foo(void) { return 1; } -__attribute__((target("prefer-avx256"))) +__attribute__((target("prefer-vector-width=128"))) int foo(void) { return 1; } +__attribute__((target("prefer-vector-width=256"))) +int foo(void) { return 1; } __attribute__((target("32"))) int foo(void) { return 1; } __attribute__((target("64"))) Index: testsuite/gcc.target/i386/avx512f-constant-float-return.c =================================================================== --- testsuite/gcc.target/i386/avx512f-constant-float-return.c (revision 255016) +++ testsuite/gcc.target/i386/avx512f-constant-float-return.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=skylake-avx512 -mprefer-avx256" } */ +/* { dg-options "-O3 -march=skylake-avx512 -mprefer-vector-width=256" } */ /* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ float Index: testsuite/gcc.target/i386/avx512f-prefer.c =================================================================== --- testsuite/gcc.target/i386/avx512f-prefer.c (revision 255016) +++ testsuite/gcc.target/i386/avx512f-prefer.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=skylake-avx512 -mprefer-avx256" } */ +/* { dg-options "-O3 -march=skylake-avx512 -mprefer-vector-width=256" } */ /* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ /* { dg-final { scan-assembler "vmulpd" } } */ Index: testsuite/gcc.target/i386/pr82460-2.c =================================================================== --- testsuite/gcc.target/i386/pr82460-2.c (revision 255016) +++ testsuite/gcc.target/i386/pr82460-2.c (working copy) @@ -1,6 +1,6 @@ /* PR target/82460 */ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -mavx512vbmi -mno-prefer-avx256" } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512vbmi -mprefer-vector-width=none" } */ /* We want to reuse the permutation mask in the loop, so use vpermt2b rather than vpermi2b. */ /* { dg-final { scan-assembler-not {\mvpermi2b\M} } } */