On Tue, Mar 25, 2025 at 5:30 PM Jonathan Wakely <jwak...@redhat.com> wrote:
> LWG 3291 make std::ranges::iota_view's iterator have input_iterator_tag > as its iterator_category, even though it satisfies the C++20 > std::forward_iterator concept. This means that the traditional > std::vector::vector(InputIterator, InputIterator) constructor treats > iota_view iterators as input iterators, because it only understands the > C++17 iterator requirements, not the C++20 iterator concepts. This > results in a loop that calls emplace_back for each individual element of > the iota_view, requiring the vector to reallocate repeatedly as the > values are inserted. This makes it unnecessarily slow to construct a > vector from an iota_view. > > This change adds a new _M_range_initialize_n function for initializing a > vector from a range (which doesn't have to be common) and a size. This > new function can be used by vector(InputIterator, InputIterator) and > vector(from_range_t, R&&) when std::ranges::distance can be used to get > the size. It can also be used by the _M_range_initialize overload that > gets the size for a Cpp17ForwardIterator pair using std::distance, and > by the vector(initializer_list) constructor. > > With this new function constructing a std::vector from iota_view does > a single allocation of the correct size and so doesn't need to > reallocate in a loop. > > libstdc++-v3/ChangeLog: > > PR libstdc++/108487 > * include/bits/stl_vector.h (vector(initializer_list)): Call > _M_range_initialize_n instead of _M_range_initialize. > (vector(InputIterator, InputIterator)): Use _M_range_initialize_n > for C++20 sized sentinels and forward iterators. > (vector(from_range_t, R&&)): Use _M_range_initialize_n for sized > ranges and forward ranges. > (vector::_M_range_initialize(FwIt, FwIt, forward_iterator_tag)): > Likewise. > (vector::_M_range_initialize_n): New function. > * testsuite/23_containers/vector/cons/108487.cc: New test. > --- > > Tests running for x86_64-linux. > > This gives a 10x speed up for the PR108487 testcase using iota_view. > > I don't see why doing this wouldn't be allowed by the standard, so it > seems worth doing. > > libstdc++-v3/include/bits/stl_vector.h | 48 ++++++++++++------- > .../23_containers/vector/cons/108487.cc | 24 ++++++++++ > 2 files changed, 56 insertions(+), 16 deletions(-) > create mode 100644 > libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc > > diff --git a/libstdc++-v3/include/bits/stl_vector.h > b/libstdc++-v3/include/bits/stl_vector.h > index 21f6cd04f49..458adc987da 100644 > --- a/libstdc++-v3/include/bits/stl_vector.h > +++ b/libstdc++-v3/include/bits/stl_vector.h > @@ -65,6 +65,9 @@ > #if __cplusplus >= 202002L > # include <compare> > #endif > +#if __glibcxx_concepts // C++ >= C++20 > +# include <bits/ranges_base.h> // ranges::distance > +#endif > #if __glibcxx_ranges_to_container // C++ >= 23 > # include <bits/ranges_algobase.h> // ranges::copy > # include <bits/ranges_util.h> // ranges::subrange > @@ -706,8 +709,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > const allocator_type& __a = allocator_type()) > : _Base(__a) > { > - _M_range_initialize(__l.begin(), __l.end(), > - random_access_iterator_tag()); > + _M_range_initialize_n(__l.begin(), __l.end(), __l.size()); > } > #endif > > @@ -735,6 +737,17 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > const allocator_type& __a = allocator_type()) > : _Base(__a) > { > +#if __glibcxx_concepts // C++ >= C++20 > + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator> > + || forward_iterator<_InputIterator>) > + { > + const auto __n > + = static_cast<size_type>(ranges::distance(__first, > __last)); > + _M_range_initialize_n(__first, __last, __n); > + return; > + } > + else > +#endif > _M_range_initialize(__first, __last, > std::__iterator_category(__first)); > } > @@ -763,13 +776,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > { > if constexpr (ranges::forward_range<_Rg> || > ranges::sized_range<_Rg>) > { > - const auto __n = size_type(ranges::distance(__rg)); > - pointer __start = > - this->_M_allocate(_S_check_init_len(__n, > - > _M_get_Tp_allocator())); > - this->_M_impl._M_finish = this->_M_impl._M_start = __start; > - this->_M_impl._M_end_of_storage = __start + __n; > - _Base::_M_append_range(__rg); > + const auto __n = > static_cast<size_type>(ranges::distance(__rg)); > + _M_range_initialize_n(ranges::begin(__rg), ranges::end(__rg), > + __n); > } > else > { > @@ -1962,15 +1971,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > _M_range_initialize(_ForwardIterator __first, _ForwardIterator > __last, > std::forward_iterator_tag) > { > - const size_type __n = std::distance(__first, __last); > - pointer __start = > + _M_range_initialize_n(__first, __last, > + std::distance(__first, __last)); > + } > + > + template<typename _Iterator, typename _Sentinel> > + _GLIBCXX20_CONSTEXPR > + void > + _M_range_initialize_n(_Iterator __first, _Sentinel __last, > + size_type __n) > + { > + pointer __start = this->_M_impl._M_start = > this->_M_allocate(_S_check_init_len(__n, > _M_get_Tp_allocator())); > - _Guard_alloc __guard(__start, __n, *this); > - this->_M_impl._M_finish = std::__uninitialized_copy_a > - (__first, __last, __start, _M_get_Tp_allocator()); > - this->_M_impl._M_start = __start; > - (void) __guard._M_release(); > this->_M_impl._M_end_of_storage = __start + __n; > + this->_M_impl._M_finish > + = std::__uninitialized_copy_a(_GLIBCXX_MOVE(__first), __last, > + __start, > _M_get_Tp_allocator()); > Should we guard the allocation using _Guard_alloc here, as it was done previously for _M_range_initialize? > } > > // Called by the first initialize_dispatch above and by the > diff --git a/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc > b/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc > new file mode 100644 > index 00000000000..13f2c478e79 > --- /dev/null > +++ b/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc > @@ -0,0 +1,24 @@ > +// { dg-do run { target c++20 } } > +// Bug libstdc++/108487 > +// ~20-30x slowdown in populating std::vector from std::ranges::iota_view > + > +#include <ranges> > +#include <testsuite_hooks.h> > +#include <testsuite_allocator.h> > + > +void > +test_pr108487() > +{ > + using __gnu_test::tracker_allocator; > + using __gnu_test::tracker_allocator_counter; > + auto r = std::ranges::iota_view{0, 20}; > + tracker_allocator_counter::reset(); > + std::vector<int, tracker_allocator<int>> v{r.begin(), r.end()}; > + const std::size_t bytes = v.capacity() * sizeof(v.front()); > + VERIFY( tracker_allocator_counter::get_allocation_count() == bytes ); > +} > + > +int main() > +{ > + test_pr108487(); > +} > -- > 2.49.0 > >