On Mon, Mar 19, 2018 at 8:35 PM, Jakub Jelinek <ja...@redhat.com> wrote: > Hi! > > As mentioned in the PR, this is another case where we've run into too many > separate feature bits issues on x86. unfortunately in this case it affects > ABI. __cpu_model variable contains just 32 bits for the features, and we > now need 4 extra features that won't fit in there. The current code just > invokes in the compiler as well as runtime initialization. > > While on {x86_64,i?86}-linux __cpu_model is exported from libgcc_s.so.1 > only for backwards compatibility and new code is always using a hidden > copy in each DSO or binary from libgcc.a, that is not the case on other > x86 targets. For a Linux only solution, we could just grow up __cpu_model > except for the one in libgcc_s.so.1 (i.e. #ifdef SHARED) and as long as GCC > 8+ libgcc.a would be used for linking of __builtin_cpu_supports code from > both old and new gcc things would just work. For other targets, as > __cpu_model is exported data object, binaries could have copy relocations > against it, so we really can't change the size. > > As the preferred way is to put it into libgcc.a only, this patch just puts > the overflow features into a new separate variable which is not put into > libgcc_s.so.1 at all, which will force use of the libgcc.a copy for > at least __builtin_cpu_supports calls that ask for the new features. > > Bootstrapped/regtested on x86_64-linux and i686-linux (on Haswell-E, don't > have access to any of the CPUs with the new features), ok for trunk? > > 2018-03-19 Jakub Jelinek <ja...@redhat.com> > > PR target/84945 > * config/i386/i386.c (fold_builtin_cpu): For features above 31 > use __cpu_features2 variable instead of __cpu_model.__cpu_features[0]. > Use 1U instead of 1. Formatting fixes. > > * gcc.target/i386/pr84945.c: New test. > > * config/i386/cpuinfo.h (__cpu_features2): Declare. > * config/i386/cpuinfo.c (__cpu_features2): New variable for > ifndef SHARED only. > (set_feature): Define. > (get_available_features): Use set_feature macro. Set __cpu_features2 > to the second word of features ifndef SHARED.
LGTM, with a couple of nits. Thanks, Uros. > --- gcc/config/i386/i386.c.jj 2018-03-17 12:11:39.954436461 +0100 > +++ gcc/config/i386/i386.c 2018-03-19 16:36:55.565231809 +0100 > @@ -33265,8 +33264,8 @@ fold_builtin_cpu (tree fndecl, tree *arg > } > > /* Get the appropriate field in __cpu_model. */ > - ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, > - field, NULL_TREE); > + ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, > + field, NULL_TREE); > > /* Check the value. */ > final = build2 (EQ_EXPR, unsigned_type_node, ref, > @@ -33296,20 +33295,34 @@ fold_builtin_cpu (tree fndecl, tree *arg > return integer_zero_node; > } > > + if (isa_names_table[i].feature >= 32) > + { > + tree __cpu_features2_var = make_var_decl (unsigned_type_node, > + "__cpu_features2"); > + > + varpool_node::add (__cpu_features2_var); > + field_val = (1U << (isa_names_table[i].feature - 32)); > + /* Return __cpu_features2 & field_val */ > + final = build2 (BIT_AND_EXPR, unsigned_type_node, > + __cpu_features2_var, > + build_int_cstu (unsigned_type_node, field_val)); > + return build1 (CONVERT_EXPR, integer_type_node, final); > + } > + > field = TYPE_FIELDS (__processor_model_type); > /* Get the last field, which is __cpu_features. */ > while (DECL_CHAIN (field)) > field = DECL_CHAIN (field); > > /* Get the appropriate field: __cpu_model.__cpu_features */ > - ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, > - field, NULL_TREE); > + ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, > + field, NULL_TREE); > > /* Access the 0th element of __cpu_features array. */ > array_elt = build4 (ARRAY_REF, unsigned_type_node, ref, > integer_zero_node, NULL_TREE, NULL_TREE); > > - field_val = (1 << isa_names_table[i].feature); > + field_val = (1U << (isa_names_table[i].feature)); Parenthesis above are not needed. > /* Return __cpu_model.__cpu_features[0] & field_val */ > final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt, > build_int_cstu (unsigned_type_node, field_val)); > --- gcc/testsuite/gcc.target/i386/pr84945.c.jj 2018-03-19 16:37:14.667228396 > +0100 > +++ gcc/testsuite/gcc.target/i386/pr84945.c 2018-03-19 16:36:42.057234231 > +0100 > @@ -0,0 +1,16 @@ > +/* PR target/84945 */ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +int > +main () > +{ > + /* AVX512_VNNI instructions are all EVEX encoded, so if > + __builtin_cpu_supports says avx512vnni is available and avx512f is not, > + this is a GCC bug. Ditto for AVX512_BITALG */ > + if (!__builtin_cpu_supports ("avx512f") > + && (__builtin_cpu_supports ("avx512vnni") > + || __builtin_cpu_supports ("avx512bitalg"))) > + __builtin_abort (); > + return 0; > +} > --- libgcc/config/i386/cpuinfo.c.jj 2018-03-15 09:10:20.870075051 +0100 > +++ libgcc/config/i386/cpuinfo.c 2018-03-19 16:13:25.059481079 +0100 > @@ -39,6 +39,13 @@ int __cpu_indicator_init (void) > > > struct __processor_model __cpu_model = { }; > +#ifndef SHARED > +/* We want to move away from __cpu_model in libgcc_s.so.1 and the > + size of __cpu_model is part of ABI. So, new features that don't > + fit into __cpu_model.__cpu_features[0] go into extra variables > + in libgcc.a only, preferrably hidden. */ > +unsigned int __cpu_features2; > +#endif > > > /* Get the specific type of AMD CPU. */ > @@ -231,78 +238,81 @@ get_available_features (unsigned int ecx > unsigned int ext_level; > > unsigned int features = 0; > + unsigned int features2 = 0; > > +#define set_feature(f) \ > + if (f < 32) features |= (1U << f); else features2 |= (1U << (f - 32)) Please add some vertical space here. > if (edx & bit_CMOV) > - features |= (1 << FEATURE_CMOV); > + set_feature (FEATURE_CMOV); > if (edx & bit_MMX) > - features |= (1 << FEATURE_MMX); > + set_feature (FEATURE_MMX); > if (edx & bit_SSE) > - features |= (1 << FEATURE_SSE); > + set_feature (FEATURE_SSE); > if (edx & bit_SSE2) > - features |= (1 << FEATURE_SSE2); > + set_feature (FEATURE_SSE2); > if (ecx & bit_POPCNT) > - features |= (1 << FEATURE_POPCNT); > + set_feature (FEATURE_POPCNT); > if (ecx & bit_AES) > - features |= (1 << FEATURE_AES); > + set_feature (FEATURE_AES); > if (ecx & bit_PCLMUL) > - features |= (1 << FEATURE_PCLMUL); > + set_feature (FEATURE_PCLMUL); > if (ecx & bit_SSE3) > - features |= (1 << FEATURE_SSE3); > + set_feature (FEATURE_SSE3); > if (ecx & bit_SSSE3) > - features |= (1 << FEATURE_SSSE3); > + set_feature (FEATURE_SSSE3); > if (ecx & bit_SSE4_1) > - features |= (1 << FEATURE_SSE4_1); > + set_feature (FEATURE_SSE4_1); > if (ecx & bit_SSE4_2) > - features |= (1 << FEATURE_SSE4_2); > + set_feature (FEATURE_SSE4_2); > if (ecx & bit_AVX) > - features |= (1 << FEATURE_AVX); > + set_feature (FEATURE_AVX); > if (ecx & bit_FMA) > - features |= (1 << FEATURE_FMA); > + set_feature (FEATURE_FMA); > > /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */ > if (max_cpuid_level >= 7) > { > __cpuid_count (7, 0, eax, ebx, ecx, edx); > if (ebx & bit_BMI) > - features |= (1 << FEATURE_BMI); > + set_feature (FEATURE_BMI); > if (ebx & bit_AVX2) > - features |= (1 << FEATURE_AVX2); > + set_feature (FEATURE_AVX2); > if (ebx & bit_BMI2) > - features |= (1 << FEATURE_BMI2); > + set_feature (FEATURE_BMI2); > if (ebx & bit_AVX512F) > - features |= (1 << FEATURE_AVX512F); > + set_feature (FEATURE_AVX512F); > if (ebx & bit_AVX512VL) > - features |= (1 << FEATURE_AVX512VL); > + set_feature (FEATURE_AVX512VL); > if (ebx & bit_AVX512BW) > - features |= (1 << FEATURE_AVX512BW); > + set_feature (FEATURE_AVX512BW); > if (ebx & bit_AVX512DQ) > - features |= (1 << FEATURE_AVX512DQ); > + set_feature (FEATURE_AVX512DQ); > if (ebx & bit_AVX512CD) > - features |= (1 << FEATURE_AVX512CD); > + set_feature (FEATURE_AVX512CD); > if (ebx & bit_AVX512PF) > - features |= (1 << FEATURE_AVX512PF); > + set_feature (FEATURE_AVX512PF); > if (ebx & bit_AVX512ER) > - features |= (1 << FEATURE_AVX512ER); > + set_feature (FEATURE_AVX512ER); > if (ebx & bit_AVX512IFMA) > - features |= (1 << FEATURE_AVX512IFMA); > + set_feature (FEATURE_AVX512IFMA); > if (ecx & bit_AVX512VBMI) > - features |= (1 << FEATURE_AVX512VBMI); > + set_feature (FEATURE_AVX512VBMI); > if (ecx & bit_AVX512VBMI2) > - features |= (1 << FEATURE_AVX512VBMI2); > + set_feature (FEATURE_AVX512VBMI2); > if (ecx & bit_GFNI) > - features |= (1 << FEATURE_GFNI); > + set_feature (FEATURE_GFNI); > if (ecx & bit_VPCLMULQDQ) > - features |= (1 << FEATURE_VPCLMULQDQ); > + set_feature (FEATURE_VPCLMULQDQ); > if (ecx & bit_AVX512VNNI) > - features |= (1 << FEATURE_AVX512VNNI); > + set_feature (FEATURE_AVX512VNNI); > if (ecx & bit_AVX512BITALG) > - features |= (1 << FEATURE_AVX512BITALG); > + set_feature (FEATURE_AVX512BITALG); > if (ecx & bit_AVX512VPOPCNTDQ) > - features |= (1 << FEATURE_AVX512VPOPCNTDQ); > + set_feature (FEATURE_AVX512VPOPCNTDQ); > if (edx & bit_AVX5124VNNIW) > - features |= (1 << FEATURE_AVX5124VNNIW); > + set_feature (FEATURE_AVX5124VNNIW); > if (edx & bit_AVX5124FMAPS) > - features |= (1 << FEATURE_AVX5124FMAPS); > + set_feature (FEATURE_AVX5124FMAPS); > } > > /* Check cpuid level of extended features. */ > @@ -313,14 +323,19 @@ get_available_features (unsigned int ecx > __cpuid (0x80000001, eax, ebx, ecx, edx); > > if (ecx & bit_SSE4a) > - features |= (1 << FEATURE_SSE4_A); > + set_feature (FEATURE_SSE4_A); > if (ecx & bit_FMA4) > - features |= (1 << FEATURE_FMA4); > + set_feature (FEATURE_FMA4); > if (ecx & bit_XOP) > - features |= (1 << FEATURE_XOP); > + set_feature (FEATURE_XOP); > } > > __cpu_model.__cpu_features[0] = features; > +#ifndef SHARED > + __cpu_features2 = features2; > +#else > + (void) features2; > +#endif > } > > /* A constructor function that is sets __cpu_model and __cpu_features with > --- libgcc/config/i386/cpuinfo.h.jj 2018-03-15 09:10:20.889075049 +0100 > +++ libgcc/config/i386/cpuinfo.h 2018-03-19 16:10:34.278510545 +0100 > @@ -124,3 +124,4 @@ extern struct __processor_model > unsigned int __cpu_subtype; > unsigned int __cpu_features[1]; > } __cpu_model; > +extern unsigned int __cpu_features2; > > Jakub