[Bug libstdc++/108823] ranges::transform could be smarter with two sized ranges

redi at gcc dot gnu.org via Gcc-bugs Wed, 26 Feb 2025 07:56:21 -0800

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108823


--- Comment #1 from Jonathan Wakely <redi at gcc dot gnu.org> ---
So maybe something like:

--- a/libstdc++-v3/include/bits/ranges_algo.h
+++ b/libstdc++-v3/include/bits/ranges_algo.h
@@ -758,11 +758,21 @@ namespace ranges
                 _Out __result, _Fp __binary_op,
                 _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const
       {
-       for (; __first1 != __last1 && __first2 != __last2;
-            ++__first1, (void)++__first2, ++__result)
-         *__result = std::__invoke(__binary_op,
-                                   std::__invoke(__proj1, *__first1),
-                                   std::__invoke(__proj2, *__first2));
+       if constexpr (sized_sentinel_for<_Sent1, _Iter1>
+                       && sized_sentinel_for<_Sent2, _Iter2>)
+         for (auto __sz = std::min<size_t>(__last1 - __first1,
+                                           __last2 - __first2);
+              __sz--;
+              ++__first1, (void)++__first2, ++__result)
+           *__result = std::__invoke(__binary_op,
+                                     std::__invoke(__proj1, *__first1),
+                                     std::__invoke(__proj2, *__first2));
+       else
+         for (; __first1 != __last1 && __first2 != __last2;
+              ++__first1, (void)++__first2, ++__result)
+           *__result = std::__invoke(__binary_op,
+                                     std::__invoke(__proj1, *__first1),
+                                     std::__invoke(__proj2, *__first2));
        return {std::move(__first1), std::move(__first2), std::move(__result)};
       }

@@ -778,10 +788,28 @@ namespace ranges
       operator()(_Range1&& __r1, _Range2&& __r2, _Out __result, _Fp
__binary_op,
                 _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const
       {
-       return (*this)(ranges::begin(__r1), ranges::end(__r1),
-                      ranges::begin(__r2), ranges::end(__r2),
-                      std::move(__result), std::move(__binary_op),
-                      std::move(__proj1), std::move(__proj2));
+       if constexpr (sized_range<_Range1> && sized_range<_Range2>
+                       && (random_access_range<_Range1>
+                             || random_access_range<_Range2>))
+         {
+           auto __sz = std::min<size_t>(ranges::size(__r1),
+                                        ranges::size(__r2));
+           auto __b1 = ranges::begin(__r1);
+           auto __b2 = ranges::begin(__r2);
+           if constexpr (random_access_range<_Range1>)
+             return (*this)(__b1, __b1 + __sz, __b2, unreachable_sentinel,
+                            std::move(__result), std::move(__binary_op),
+                            std::move(__proj1), std::move(__proj2));
+           else
+             return (*this)(__b1, unreachable_sentinel, __b2, __b2 + __sz,
+                            std::move(__result), std::move(__binary_op),
+                            std::move(__proj1), std::move(__proj2));
+         }
+       else
+         return (*this)(ranges::begin(__r1), ranges::end(__r1),
+                        ranges::begin(__r2), ranges::end(__r2),
+                        std::move(__result), std::move(__binary_op),
+                        std::move(__proj1), std::move(__proj2));
       }
   };


So ranges::transform(i1, s1, i2, s2, out, op) calculates the loop count first
if sized_sentinel_for is satisfied for both ranges, and then loops that many
times instead of using i1 != s1 && i2 != s2

And ranges::transform(r1, r2, out, op) calculates the loop count using
ranges::size on both ranges and uses your unreachable_sentinel trick if either
of the ranges is random access. This way we can simplify the i1 != s1 && i2 !=
s2 check even if only one of the ranges is random access, e.g. transforming a
std::vector and std::list. In the latter case we probably won't get
vectorization because the iterators aren't pointers, but it might still be
beneficial to simplify the loop condition.

[Bug libstdc++/108823] ranges::transform could be smarter with two sized ranges

Reply via email to