On Wed, 15 Jan 2025 at 12:10, Jonathan Wakely <[email protected]> wrote:
>
> On Fri, 13 Dec 2024 at 14:01, Jan Hubicka <[email protected]> wrote:
> >
> > Hi,
> > this patch improves code generation on string constructors. We currently
> > have
> > _M_construct which takes as a parameter two iterators (begin/end pointers to
> > other string) and produces new string. This patch adds special case of
> > constructor where instead of begining/end pointers we readily know the
> > string
> > size and also special case when we know that source is 0 terminated. This
> > happens commonly when producing stirng copies. Moreover currently ipa-prop
> > is
> > not able to propagate information that beg-end is known constant (copied
> > string
> > size) which makes it impossible for inliner to spot the common case where
> > string size is known to be shorter than 15 bytes and fits in local buffer.
> >
> > Finally I made new constructor inline. Because it is explicitely
> > instantiated
> > without C++20 constexpr we do not produce implicit instantiation (as
> > required
> > by standard) which prevents inlining, ipa-modref and any other IPA analysis
> > to
> > happen. I think we need to make many of the other functions inline, since
> > optimization accross string manipulation is quite important. There is
> > PR94960
> > to track this issue.
> >
> > Bootstrapped/regtested x86_64-linux, OK?
> >
> > libstdc++-v3/ChangeLog:
> >
> > PR tree-optimization/103827
> > PR tree-optimization/80331
> > PR tree-optimization/87502
> >
> > * config/abi/pre/gnu.ver: Add version for _M_construct<bool>
> > * include/bits/basic_string.h: (basic_string::_M_construct<bool>):
> > Declare.
> > (basic_string constructors): Use it.
> > * include/bits/basic_string.tcc:
> > (basic_string::_M_construct<bool>): New template.
> > * src/c++11/string-inst.cc: Instantated S::_M_construct<bool>.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * g++.dg/tree-ssa/pr103827.C: New test.
> > * g++.dg/tree-ssa/pr80331.C: New test.
> > * g++.dg/tree-ssa/pr87502.C: New test.
> >
> > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103827.C
> > b/gcc/testsuite/g++.dg/tree-ssa/pr103827.C
> > new file mode 100644
> > index 00000000000..6059fe514b1
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103827.C
> > @@ -0,0 +1,22 @@
> > +// { dg-do compile }
> > +// { dg-options "-O1 -fdump-tree-optimized" }
> > +struct foo
> > +{
> > + int a;
> > + void bar() const;
> > + ~foo()
> > + {
> > + if (a != 42)
> > + __builtin_abort ();
> > + }
> > +};
> > +__attribute__ ((noinline))
> > +void test(const struct foo a)
> > +{
> > + int b = a.a;
> > + a.bar();
> > + if (a.a != b)
> > + __builtin_printf ("optimize me away");
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-not "optimize me away" "optimized" } } */
> > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr80331.C
> > b/gcc/testsuite/g++.dg/tree-ssa/pr80331.C
> > new file mode 100644
> > index 00000000000..85034504f2f
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr80331.C
> > @@ -0,0 +1,8 @@
> > +// { dg-do compile }
> > +// { dg-additional-options "-O2 -fdump-tree-optimized" }
> > +#include<string>
> > +int sain() {
> > + const std::string remove_me("remove_me");
> > + return 0;
> > +}
> > +// { dg-final { scan-tree-dump-not "remove_me" "optimized" } }
> > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87502.C
> > b/gcc/testsuite/g++.dg/tree-ssa/pr87502.C
> > new file mode 100644
> > index 00000000000..7975432597d
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr87502.C
> > @@ -0,0 +1,16 @@
> > +// { dg-do compile }
> > +// { dg-additional-options "-O2 -fdump-tree-optimized" }
> > +#include <string>
> > +
> > +
> > +__attribute__ ((pure))
> > +extern int foo (const std::string &);
> > +
> > +int
> > +bar ()
> > +{
> > + return foo ("abc") + foo (std::string("abc"));
> > +}
> > +// We used to add terminating zero explicitely instead of using fact
> > +// that memcpy source is already 0 terminated.
> > +// { dg-final { scan-tree-dump-not "remove_me" "= 0;" } }
> > diff --git a/libstdc++-v3/config/abi/pre/gnu.ver
> > b/libstdc++-v3/config/abi/pre/gnu.ver
> > index ae79b371d80..75a6ade1373 100644
> > --- a/libstdc++-v3/config/abi/pre/gnu.ver
> > +++ b/libstdc++-v3/config/abi/pre/gnu.ver
> > @@ -2540,6 +2540,9 @@ GLIBCXX_3.4.34 {
> >
> > _ZNSt8__format25__locale_encoding_to_utf8ERKSt6localeSt17basic_string_viewIcSt11char_traitsIcEEPv;
> > # __sso_string constructor and destructor
> > _ZNSt12__sso_string[CD][12]Ev;
> > + # void std::__cxx11::basic_string<char, std::char_traits<char>,
> > std::allocator<char> >::_M_construct<bool>(char const*, unsigned long)
> > + # and wide char version
> > +
> > _ZNSt7__cxx1112basic_stringI[cw]St11char_traitsI[cw]ESaI[cw]EE12_M_constructILb[01]EEEvPK[cw]m;
> > } GLIBCXX_3.4.33;
> >
> > # Symbols in the support library (libsupc++) have their own tag.
> > diff --git a/libstdc++-v3/include/bits/basic_string.h
> > b/libstdc++-v3/include/bits/basic_string.h
> > index 8369c24d3ae..effc22b8dc9 100644
> > --- a/libstdc++-v3/include/bits/basic_string.h
> > +++ b/libstdc++-v3/include/bits/basic_string.h
> > @@ -341,6 +341,13 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> > void
> > _M_construct(size_type __req, _CharT __c);
> >
> > + // Construct using block of memory of known size.
> > + // If _Terminated is true assume that source is already 0 terminated.
> > + template<bool _Terminated>
> > + _GLIBCXX20_CONSTEXPR inline
>
> This 'inline' is redundant, please remove.
>
> > + void
> > + _M_construct(const _CharT *__c, size_type __n);
> > +
> > _GLIBCXX20_CONSTEXPR
> > allocator_type&
> > _M_get_allocator()
> > @@ -561,8 +568,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> > : _M_dataplus(_M_local_data(),
> > * include/bits/vector.tcc:
> >
> > _Alloc_traits::_S_select_on_copy(__str._M_get_allocator()))
> > {
> > - _M_construct(__str._M_data(), __str._M_data() + __str.length(),
> > - std::forward_iterator_tag());
> > + _M_construct<true>(__str._M_data(), __str.length());
> > }
> >
> > // _GLIBCXX_RESOLVE_LIB_DEFECTS
> > @@ -580,8 +586,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> > {
> > const _CharT* __start = __str._M_data()
> > + __str._M_check(__pos, "basic_string::basic_string");
> > - _M_construct(__start, __start + __str._M_limit(__pos, npos),
> > - std::forward_iterator_tag());
> > + _M_construct<false>(__start, __str._M_limit(__pos, npos));
> > }
> >
> > /**
> > @@ -597,8 +602,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> > {
> > const _CharT* __start = __str._M_data()
> > + __str._M_check(__pos, "basic_string::basic_string");
> > - _M_construct(__start, __start + __str._M_limit(__pos, __n),
> > - std::forward_iterator_tag());
> > + _M_construct<false>(__start, __str._M_limit(__pos, __n));
> > }
> >
> > /**
> > @@ -615,8 +619,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> > {
> > const _CharT* __start
> > = __str._M_data() + __str._M_check(__pos, "string::string");
> > - _M_construct(__start, __start + __str._M_limit(__pos, __n),
> > - std::forward_iterator_tag());
> > + _M_construct<false>(__start, __str._M_limit(__pos, __n));
> > }
> >
> > /**
> > @@ -637,7 +640,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> > if (__s == 0 && __n > 0)
> > std::__throw_logic_error(__N("basic_string: "
> > "construction from null is not
> > valid"));
> > - _M_construct(__s, __s + __n, std::forward_iterator_tag());
> > + _M_construct<false>(__s, __n);
> > }
> >
> > /**
> > @@ -658,8 +661,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> > if (__s == 0)
> > std::__throw_logic_error(__N("basic_string: "
> > "construction from null is not
> > valid"));
> > - const _CharT* __end = __s + traits_type::length(__s);
> > - _M_construct(__s, __end, forward_iterator_tag());
> > + _M_construct<true>(__s, traits_type::length (__s));
> > }
> >
> > /**
> > @@ -723,7 +725,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> > _GLIBCXX20_CONSTEXPR
> > basic_string(const basic_string& __str, const _Alloc& __a)
> > : _M_dataplus(_M_local_data(), __a)
> > - { _M_construct(__str.begin(), __str.end(),
> > std::forward_iterator_tag()); }
> > + { _M_construct<true>(__str._M_data(), __str.length()); }
> >
> > _GLIBCXX20_CONSTEXPR
> > basic_string(basic_string&& __str, const _Alloc& __a)
> > @@ -748,7 +750,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> > __str._M_set_length(0);
> > }
> > else
> > - _M_construct(__str.begin(), __str.end(),
> > std::forward_iterator_tag());
> > + _M_construct<true>(__str._M_data(), __str.length());
> > }
> > #endif // C++11
> >
> > diff --git a/libstdc++-v3/include/bits/basic_string.tcc
> > b/libstdc++-v3/include/bits/basic_string.tcc
> > index caeddaf2f5b..395010aa432 100644
> > --- a/libstdc++-v3/include/bits/basic_string.tcc
> > +++ b/libstdc++-v3/include/bits/basic_string.tcc
> > @@ -275,6 +275,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >
> > _M_set_length(__n);
> > }
>
> Newline between functions please.
>
> OK with those two changes.
Looking back through my inbox, this one doesn't seem to have been
pushed. Was it superseded by something else, or is it just waiting for
stage 1 now?
>
>
> > + // Length of string constructed is easier to propagate inter-procedurally
> > + // than difference between iterators.
> > + template<typename _CharT, typename _Traits, typename _Alloc>
> > + template<bool _Terminated>
> > + _GLIBCXX20_CONSTEXPR inline
>
> The 'inline' here is not redundant, so keep this one.
>
> > + void
> > + basic_string<_CharT, _Traits, _Alloc>::
> > + _M_construct(const _CharT *__str, size_type __n)
> > + {
> > + if (__n > size_type(_S_local_capacity))
> > + {
> > + _M_data(_M_create(__n, size_type(0)));
> > + _M_capacity(__n);
> > + }
> > + else
> > + _M_init_local_buf();
> > +
> > + if (__n || _Terminated)
> > + this->_S_copy(_M_data(), __str, __n + _Terminated);
> > +
> > + _M_length(__n);
> > + if (!_Terminated)
> > + traits_type::assign(_M_data()[__n], _CharT());
> > + }
> >
> > template<typename _CharT, typename _Traits, typename _Alloc>
> > _GLIBCXX20_CONSTEXPR
> > diff --git a/libstdc++-v3/src/c++11/string-inst.cc
> > b/libstdc++-v3/src/c++11/string-inst.cc
> > index ec5ba41ebcd..4be626806bf 100644
> > --- a/libstdc++-v3/src/c++11/string-inst.cc
> > +++ b/libstdc++-v3/src/c++11/string-inst.cc
> > @@ -91,6 +91,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> > void
> > S::_M_construct(const C*, const C*, forward_iterator_tag);
> >
> > + template
> > + void
> > + S::_M_construct<false>(const C*, size_t);
> > +
> > + template
> > + void
> > + S::_M_construct<true>(const C*, size_t);
> > +
> > #else // !_GLIBCXX_USE_CXX11_ABI
> >
> > template
> >