Hi! glibc for -ffast-math annotates a couple of math functions with simd attribute, so that one can use vectorized versions with 4/8/16 vectorization factor.
If one uses ::cos or ::cosf or std::cos(double), this works just fine, but not when using std::cos(float). This is because the libstdc++ headers call __builtin_cosf, but the builtin function doesn't have the simd attribute, only ::cosf does. Attached are 2 patches to improve this. The first one is a C/C++ FE change, which arranges that if we add simd attribute to say ::cosf, then calls to __builtin_cosf will act as if __builtin_cosf also has the attribute. While other attributes aren't handled this way, perhaps a small precedent to such change is that if somebody uses typeof (cosf) cosf __asm ("foobar"); then calls to __builtin_cosf if they expand into a library call will call foobar, not cosf. The other patch is instead a libstdc++ change, not using __builtin_cosf etc., but ::cosf. Both patches have been (separately) bootstrapped/regtested on x86_64-linux and i686-linux. Jakub
2017-08-07 Jakub Jelinek <ja...@redhat.com> PR libstdc++/81706 * tree.c (attribute_value_equal): Use omp_declare_simd_clauses_equal for comparison of OMP_CLAUSEs regardless of flag_openmp{,_simd}. * c-decl.c (merge_decls): Copy "omp declare simd" attributes from newdecl to corresponding __builtin_ if any. * decl.c (duplicate_decls): Copy "omp declare simd" attributes from newdecl to corresponding __builtin_ if any. * gcc.target/i386/pr81706.c: New test. * g++.dg/ext/pr81706.C: New test. --- gcc/tree.c.jj 2017-07-29 09:48:40.000000000 +0200 +++ gcc/tree.c 2017-08-04 12:06:35.636072718 +0200 @@ -5022,8 +5022,8 @@ attribute_value_equal (const_tree attr1, TREE_VALUE (attr2)) == 1); } - if ((flag_openmp || flag_openmp_simd) - && TREE_VALUE (attr1) && TREE_VALUE (attr2) + if (TREE_VALUE (attr1) + && TREE_VALUE (attr2) && TREE_CODE (TREE_VALUE (attr1)) == OMP_CLAUSE && TREE_CODE (TREE_VALUE (attr2)) == OMP_CLAUSE) return omp_declare_simd_clauses_equal (TREE_VALUE (attr1), --- gcc/c/c-decl.c.jj 2017-07-31 11:31:15.000000000 +0200 +++ gcc/c/c-decl.c 2017-08-04 12:39:48.113226134 +0200 @@ -2566,6 +2566,36 @@ merge_decls (tree newdecl, tree olddecl, set_builtin_decl_declared_p (fncode, true); break; } + + tree s = lookup_attribute ("omp declare simd", + DECL_ATTRIBUTES (newdecl)); + if (s) + { + tree b + = builtin_decl_explicit (DECL_FUNCTION_CODE (newdecl)); + if (b) + { + tree s2 = lookup_attribute ("omp declare simd", + DECL_ATTRIBUTES (b)); + while (s) + { + tree s3; + for (s3 = s2; s3; + s3 = lookup_attribute ("omp declare simd", + TREE_CHAIN (s3))) + if (attribute_value_equal (s, s3)) + break; + if (!s3) + { + s3 = copy_node (s); + TREE_CHAIN (s3) = DECL_ATTRIBUTES (b); + DECL_ATTRIBUTES (b) = s3; + } + s = lookup_attribute ("omp declare simd", + TREE_CHAIN (s)); + } + } + } } } else --- gcc/cp/decl.c.jj 2017-08-01 19:23:10.000000000 +0200 +++ gcc/cp/decl.c 2017-08-04 12:44:44.773780568 +0200 @@ -2456,6 +2456,35 @@ next_arg:; break; } } + + tree s = lookup_attribute ("omp declare simd", + DECL_ATTRIBUTES (newdecl)); + if (s) + { + tree b = builtin_decl_explicit (DECL_FUNCTION_CODE (newdecl)); + if (b) + { + tree s2 = lookup_attribute ("omp declare simd", + DECL_ATTRIBUTES (b)); + while (s) + { + tree s3; + for (s3 = s2; s3; + s3 = lookup_attribute ("omp declare simd", + TREE_CHAIN (s3))) + if (attribute_value_equal (s, s3)) + break; + if (!s3) + { + s3 = copy_node (s); + TREE_CHAIN (s3) = DECL_ATTRIBUTES (b); + DECL_ATTRIBUTES (b) = s3; + } + s = lookup_attribute ("omp declare simd", + TREE_CHAIN (s)); + } + } + } } if (new_defines_function) /* If defining a function declared with other language --- gcc/testsuite/gcc.target/i386/pr81706.c.jj 2017-08-06 23:50:46.511337565 +0200 +++ gcc/testsuite/gcc.target/i386/pr81706.c 2017-08-06 23:50:35.000000000 +0200 @@ -0,0 +1,32 @@ +/* PR libstdc++/81706 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx2 -mno-avx512f" } */ +/* { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_cos" } } */ +/* { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_sin" } } */ + +#ifdef __cplusplus +extern "C" { +#endif +extern double cos (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +extern double sin (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +#ifdef __cplusplus +} +#endif +double p[1024] = { 1.0 }; +double q[1024] = { 1.0 }; + +void +foo (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = cos (q[i]); +} + +void +bar (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = __builtin_sin (q[i]); +} --- gcc/testsuite/g++.dg/ext/pr81706.C.jj 2017-08-06 23:51:09.318065575 +0200 +++ gcc/testsuite/g++.dg/ext/pr81706.C 2017-08-06 23:51:38.577716630 +0200 @@ -0,0 +1,32 @@ +// PR libstdc++/81706 +// { dg-do compile { target i?86-*-* x86_64-*-* } } +// { dg-options "-O3 -mavx2 -mno-avx512f" } +// { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_cos" } } +// { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_sin" } } + +#ifdef __cplusplus +extern "C" { +#endif +extern double cos (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +extern double sin (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +#ifdef __cplusplus +} +#endif +double p[1024] = { 1.0 }; +double q[1024] = { 1.0 }; + +void +foo (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = cos (q[i]); +} + +void +bar (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = __builtin_sin (q[i]); +}
2017-08-07 Jakub Jelinek <ja...@redhat.com> PR libstdc++/81706 * include/c_global/cmath (std::cos, std::exp, std::log, std::pow, std::sin): Call ::FNf instead of __builtin_FNf in float overloads. --- libstdc++-v3/include/c_global/cmath.jj 2017-07-24 10:57:58.000000000 +0200 +++ libstdc++-v3/include/c_global/cmath 2017-08-04 10:16:59.484637951 +0200 @@ -182,7 +182,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #ifndef __CORRECT_ISO_CPP_MATH_H_PROTO inline _GLIBCXX_CONSTEXPR float cos(float __x) - { return __builtin_cosf(__x); } + { return ::cosf(__x); } inline _GLIBCXX_CONSTEXPR long double cos(long double __x) @@ -220,7 +220,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #ifndef __CORRECT_ISO_CPP_MATH_H_PROTO inline _GLIBCXX_CONSTEXPR float exp(float __x) - { return __builtin_expf(__x); } + { return ::expf(__x); } inline _GLIBCXX_CONSTEXPR long double exp(long double __x) @@ -336,7 +336,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #ifndef __CORRECT_ISO_CPP_MATH_H_PROTO inline _GLIBCXX_CONSTEXPR float log(float __x) - { return __builtin_logf(__x); } + { return ::logf(__x); } inline _GLIBCXX_CONSTEXPR long double log(long double __x) @@ -386,7 +386,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #ifndef __CORRECT_ISO_CPP_MATH_H_PROTO inline _GLIBCXX_CONSTEXPR float pow(float __x, float __y) - { return __builtin_powf(__x, __y); } + { return ::powf(__x, __y); } inline _GLIBCXX_CONSTEXPR long double pow(long double __x, long double __y) @@ -423,7 +423,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #ifndef __CORRECT_ISO_CPP_MATH_H_PROTO inline _GLIBCXX_CONSTEXPR float sin(float __x) - { return __builtin_sinf(__x); } + { return ::sinf(__x); } inline _GLIBCXX_CONSTEXPR long double sin(long double __x)