https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49363
--- Comment #23 from vincenzo Innocente <vincenzo.innocente at cern dot ch> --- Which Syntax? I want to reuse the same code for the various architecture and let gcc deal with vectorization details. The best I manage to do to share code is something like this namespace { inline float _sum0(float const * x, float const * y, float const * z) { float sum=0; for (int i=0; i!=1024; ++i) sum += z[i]+x[i]*y[i]; return sum; } } float __attribute__ ((__target__ ("arch=haswell"))) sum1(float const * x, float const * y, float const * z) { return _sum0(x,y,z); } float __attribute__ ((__target__ ("arch=nehalem"))) sum1(float const * x, float const * y, float const * z) { return _sum0(x,y,z); } //---------- this for instance does not work (produce code only for haswell) float __attribute__ ( (__target__("arch=nehalem"), __target__("arch=haswell")) ) sum0(float const * x, float const * y, float const * z) { float sum=0; for (int i=0; i!=1024; ++i) sum += z[i]+x[i]*y[i]; return sum; }