On Tue, Mar 25, 2025 at 5:30 PM Jonathan Wakely <[email protected]> wrote:
> LWG 3291 make std::ranges::iota_view's iterator have input_iterator_tag
> as its iterator_category, even though it satisfies the C++20
> std::forward_iterator concept. This means that the traditional
> std::vector::vector(InputIterator, InputIterator) constructor treats
> iota_view iterators as input iterators, because it only understands the
> C++17 iterator requirements, not the C++20 iterator concepts. This
> results in a loop that calls emplace_back for each individual element of
> the iota_view, requiring the vector to reallocate repeatedly as the
> values are inserted. This makes it unnecessarily slow to construct a
> vector from an iota_view.
>
> This change adds a new _M_range_initialize_n function for initializing a
> vector from a range (which doesn't have to be common) and a size. This
> new function can be used by vector(InputIterator, InputIterator) and
> vector(from_range_t, R&&) when std::ranges::distance can be used to get
> the size. It can also be used by the _M_range_initialize overload that
> gets the size for a Cpp17ForwardIterator pair using std::distance, and
> by the vector(initializer_list) constructor.
>
> With this new function constructing a std::vector from iota_view does
> a single allocation of the correct size and so doesn't need to
> reallocate in a loop.
>
> libstdc++-v3/ChangeLog:
>
> PR libstdc++/108487
> * include/bits/stl_vector.h (vector(initializer_list)): Call
> _M_range_initialize_n instead of _M_range_initialize.
> (vector(InputIterator, InputIterator)): Use _M_range_initialize_n
> for C++20 sized sentinels and forward iterators.
> (vector(from_range_t, R&&)): Use _M_range_initialize_n for sized
> ranges and forward ranges.
> (vector::_M_range_initialize(FwIt, FwIt, forward_iterator_tag)):
> Likewise.
> (vector::_M_range_initialize_n): New function.
> * testsuite/23_containers/vector/cons/108487.cc: New test.
> ---
>
> Tests running for x86_64-linux.
>
> This gives a 10x speed up for the PR108487 testcase using iota_view.
>
> I don't see why doing this wouldn't be allowed by the standard, so it
> seems worth doing.
>
> libstdc++-v3/include/bits/stl_vector.h | 48 ++++++++++++-------
> .../23_containers/vector/cons/108487.cc | 24 ++++++++++
> 2 files changed, 56 insertions(+), 16 deletions(-)
> create mode 100644
> libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc
>
> diff --git a/libstdc++-v3/include/bits/stl_vector.h
> b/libstdc++-v3/include/bits/stl_vector.h
> index 21f6cd04f49..458adc987da 100644
> --- a/libstdc++-v3/include/bits/stl_vector.h
> +++ b/libstdc++-v3/include/bits/stl_vector.h
> @@ -65,6 +65,9 @@
> #if __cplusplus >= 202002L
> # include <compare>
> #endif
> +#if __glibcxx_concepts // C++ >= C++20
> +# include <bits/ranges_base.h> // ranges::distance
> +#endif
> #if __glibcxx_ranges_to_container // C++ >= 23
> # include <bits/ranges_algobase.h> // ranges::copy
> # include <bits/ranges_util.h> // ranges::subrange
> @@ -706,8 +709,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
> const allocator_type& __a = allocator_type())
> : _Base(__a)
> {
> - _M_range_initialize(__l.begin(), __l.end(),
> - random_access_iterator_tag());
> + _M_range_initialize_n(__l.begin(), __l.end(), __l.size());
> }
> #endif
>
> @@ -735,6 +737,17 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
> const allocator_type& __a = allocator_type())
> : _Base(__a)
> {
> +#if __glibcxx_concepts // C++ >= C++20
> + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>
> + || forward_iterator<_InputIterator>)
> + {
> + const auto __n
> + = static_cast<size_type>(ranges::distance(__first,
> __last));
> + _M_range_initialize_n(__first, __last, __n);
> + return;
> + }
> + else
> +#endif
> _M_range_initialize(__first, __last,
> std::__iterator_category(__first));
> }
> @@ -763,13 +776,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
> {
> if constexpr (ranges::forward_range<_Rg> ||
> ranges::sized_range<_Rg>)
> {
> - const auto __n = size_type(ranges::distance(__rg));
> - pointer __start =
> - this->_M_allocate(_S_check_init_len(__n,
> -
> _M_get_Tp_allocator()));
> - this->_M_impl._M_finish = this->_M_impl._M_start = __start;
> - this->_M_impl._M_end_of_storage = __start + __n;
> - _Base::_M_append_range(__rg);
> + const auto __n =
> static_cast<size_type>(ranges::distance(__rg));
> + _M_range_initialize_n(ranges::begin(__rg), ranges::end(__rg),
> + __n);
> }
> else
> {
> @@ -1962,15 +1971,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
> _M_range_initialize(_ForwardIterator __first, _ForwardIterator
> __last,
> std::forward_iterator_tag)
> {
> - const size_type __n = std::distance(__first, __last);
> - pointer __start =
> + _M_range_initialize_n(__first, __last,
> + std::distance(__first, __last));
> + }
> +
> + template<typename _Iterator, typename _Sentinel>
> + _GLIBCXX20_CONSTEXPR
> + void
> + _M_range_initialize_n(_Iterator __first, _Sentinel __last,
> + size_type __n)
> + {
> + pointer __start = this->_M_impl._M_start =
> this->_M_allocate(_S_check_init_len(__n,
> _M_get_Tp_allocator()));
> - _Guard_alloc __guard(__start, __n, *this);
> - this->_M_impl._M_finish = std::__uninitialized_copy_a
> - (__first, __last, __start, _M_get_Tp_allocator());
> - this->_M_impl._M_start = __start;
> - (void) __guard._M_release();
> this->_M_impl._M_end_of_storage = __start + __n;
> + this->_M_impl._M_finish
> + = std::__uninitialized_copy_a(_GLIBCXX_MOVE(__first), __last,
> + __start,
> _M_get_Tp_allocator());
>
Should we guard the allocation using _Guard_alloc here, as it was done
previously for _M_range_initialize?
> }
>
> // Called by the first initialize_dispatch above and by the
> diff --git a/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc
> b/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc
> new file mode 100644
> index 00000000000..13f2c478e79
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc
> @@ -0,0 +1,24 @@
> +// { dg-do run { target c++20 } }
> +// Bug libstdc++/108487
> +// ~20-30x slowdown in populating std::vector from std::ranges::iota_view
> +
> +#include <ranges>
> +#include <testsuite_hooks.h>
> +#include <testsuite_allocator.h>
> +
> +void
> +test_pr108487()
> +{
> + using __gnu_test::tracker_allocator;
> + using __gnu_test::tracker_allocator_counter;
> + auto r = std::ranges::iota_view{0, 20};
> + tracker_allocator_counter::reset();
> + std::vector<int, tracker_allocator<int>> v{r.begin(), r.end()};
> + const std::size_t bytes = v.capacity() * sizeof(v.front());
> + VERIFY( tracker_allocator_counter::get_allocation_count() == bytes );
> +}
> +
> +int main()
> +{
> + test_pr108487();
> +}
> --
> 2.49.0
>
>