https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49363

--- Comment #23 from vincenzo Innocente <vincenzo.innocente at cern dot ch> ---
Which Syntax?
I want to reuse the same code for the various architecture and let gcc deal
with vectorization details.
The best I manage to do to share code is something like this

namespace {
inline
float _sum0(float const *  x,
           float const *  y, float const *  z) {
  float sum=0;
  for (int i=0; i!=1024; ++i)
    sum += z[i]+x[i]*y[i];
  return sum;
}
}


float  __attribute__ ((__target__ ("arch=haswell")))
sum1(float const *  x,
     float const *  y, float const *  z) {
  return _sum0(x,y,z);
}

float  __attribute__ ((__target__ ("arch=nehalem")))
sum1(float const *  x,
     float const *  y, float const *  z) {
  return _sum0(x,y,z);
}

//----------

this for instance does not work (produce code only for haswell)

float  __attribute__ ( (__target__("arch=nehalem"), __target__("arch=haswell"))
)
sum0(float const *  x,
      float const *  y, float const *  z) {
 float sum=0;
 for (int i=0; i!=1024; ++i)
   sum += z[i]+x[i]*y[i];
 return sum;
}

Reply via email to