Hi all, I've been experimenting with sorting recently and I have noticed a possibility to slightly optimize the sorting of std::pair values using the default < operator. This is, I believe, a common usage case to retrieve sorting indices (better locality of reference than sorting via pointers). At least I usually do it that way :)
The idea is simple: we know how the < operator for std::pairs looks like; basically it's x.first < y.first || (! (y.first < x.first) && x.second < y.second) to reduce conditional branching in comparisons, we can delay evaluating the secondary conditions like this: 1. sort by first components 2. identify identical runs 3. sort each run by second components The attached source contains a proposed simple sort_pairs implementation plus a benchmark code. Using g++ -O3 -march=native, on my Core 2 Duo @ 2.83 GHz, I get for instance the following results: ha...@hajek:~/devel/scratch> ./a.out test int, int: timings: sort_pairs = 0.47 sec std::sort = 0.51 sec speed-up = 8.51064% results identical test int, int with small range: timings: sort_pairs = 0.52 sec std::sort = 0.6 sec speed-up = 15.3846% results identical test double, int: timings: sort_pairs = 0.6 sec std::sort = 0.64 sec speed-up = 6.66666% results identical test double, int with small range: timings: sort_pairs = 0.68 sec std::sort = 0.71 sec speed-up = 4.41176% results identical and in general the speed-ups vary considerably (it's random data), but I failed to produce anything below 3%. The question is whether this slight optimization is worth including in GCC's C++ library - std::sort could dispatch to sort_pairs automatically when iterator's ValueType is a std::pair instance using a trait mechanism (not yet implemented). best regards and thanks for GCC -- RNDr. Jaroslav Hajek, PhD computing expert & GNU Octave developer Aeronautical Research and Test Institute (VZLU) Prague, Czech Republic url: www.highegg.matfyz.cz
// Copyright (C) 2010 Jaroslav Hajek #include <iostream> #include <ctime> #include <vector> #include <algorithm> #include <iterator> #include <utility> namespace std { template <typename _T1, typename _T2> struct __less_pair_first { bool operator ()(const std::pair<_T1, _T2>& __p1, const std::pair<_T1, _T2>& __p2) { return __p1.first < __p2.first; } }; template <typename _T1, typename _T2> struct __less_pair_second { bool operator ()(const std::pair<_T1, _T2>& __p1, const std::pair<_T1, _T2>& __p2) { return __p1.second < __p2.second; } }; template<typename _RandomAccessIterator> inline void sort_pairs(_RandomAccessIterator __first, _RandomAccessIterator __last) { typedef typename iterator_traits<_RandomAccessIterator>::value_type _ValueType; typedef typename _ValueType::first_type _FirstType; typedef typename _ValueType::second_type _SecondType; sort(__first, __last, __less_pair_first<_FirstType, _SecondType>()); if (__first == __last) return; for (_RandomAccessIterator __next = __first + 1; __next != __last; __next++) { if ((*__first).first < (*__next).first) { if (__first + 1 != __next) sort (__first, __next, __less_pair_second<_FirstType, _SecondType>()); __first = __next; } } if (__first + 1 != __last) sort (__first, __last, __less_pair_second<_FirstType, _SecondType>()); } } template<typename T1, typename T2> struct generator { int i; const int range; generator (int r) : i (0), range (r) { } std::pair<T1, T2> operator () (void) { return std::make_pair (rand () % range, i++); } }; template<typename T1, typename T2> void benchmark (int n, int r) { std::vector< std::pair<T1, T2> > v; std::generate_n (std::back_inserter (v), n, generator<T1, T2> (r)); std::vector< std::pair<T1, T2> > w = v; std::clock_t st = std::clock (); std::sort_pairs (w.begin (), w.end ()); std::clock_t et = std::clock (); float t1 = static_cast<float> (et - st)/CLOCKS_PER_SEC; st = std::clock (); std::sort (v.begin (), v.end ()); et = std::clock (); float t2 = static_cast<float> (et - st)/CLOCKS_PER_SEC; std::cout << "timings: sort_pairs = " << t1 << " sec" << " std::sort = " << t2 << " sec" << " speed-up = " << 100*(t2/t1-1) << "%" << std::endl; if (std::equal (v.begin (), v.end (), w.begin ())) std::cout << "results identical" << std::endl; else std::cout << "results differ!" << std::endl; } int main () { srand (time (NULL)); std::cout << "test int, int:\n"; benchmark<int, int> (5000000, 20000000); std::cout << "test int, int with small range:\n"; benchmark<int, int> (5000000, 50000); std::cout << "test double, int:\n"; benchmark<double, int> (5000000, 20000000); std::cout << "test double, int with small range:\n"; benchmark<double, int> (5000000, 50000); }