Hi all,
I've been experimenting with sorting recently and I have noticed a
possibility to slightly optimize the sorting of std::pair values using
the default < operator. This is, I believe, a common usage case to
retrieve sorting indices (better locality of reference than sorting
via pointers). At least I usually do it that way :)
The idea is simple: we know how the < operator for std::pairs looks
like; basically it's
x.first < y.first || (! (y.first < x.first) && x.second < y.second)
to reduce conditional branching in comparisons, we can delay
evaluating the secondary conditions like this:
1. sort by first components
2. identify identical runs
3. sort each run by second components
The attached source contains a proposed simple sort_pairs
implementation plus a benchmark code.
Using g++ -O3 -march=native, on my Core 2 Duo @ 2.83 GHz, I get for
instance the following results:
ha...@hajek:~/devel/scratch> ./a.out
test int, int:
timings: sort_pairs = 0.47 sec std::sort = 0.51 sec speed-up = 8.51064%
results identical
test int, int with small range:
timings: sort_pairs = 0.52 sec std::sort = 0.6 sec speed-up = 15.3846%
results identical
test double, int:
timings: sort_pairs = 0.6 sec std::sort = 0.64 sec speed-up = 6.66666%
results identical
test double, int with small range:
timings: sort_pairs = 0.68 sec std::sort = 0.71 sec speed-up = 4.41176%
results identical
and in general the speed-ups vary considerably (it's random data), but
I failed to produce anything below 3%.
The question is whether this slight optimization is worth including in
GCC's C++ library - std::sort could dispatch to sort_pairs
automatically when iterator's ValueType is a std::pair instance using
a trait mechanism (not yet implemented).
best regards and thanks for GCC
--
RNDr. Jaroslav Hajek, PhD
computing expert & GNU Octave developer
Aeronautical Research and Test Institute (VZLU)
Prague, Czech Republic
url: www.highegg.matfyz.cz
// Copyright (C) 2010 Jaroslav Hajek
#include <iostream>
#include <ctime>
#include <vector>
#include <algorithm>
#include <iterator>
#include <utility>
namespace std
{
template <typename _T1, typename _T2>
struct __less_pair_first
{
bool operator ()(const std::pair<_T1, _T2>& __p1,
const std::pair<_T1, _T2>& __p2)
{
return __p1.first < __p2.first;
}
};
template <typename _T1, typename _T2>
struct __less_pair_second
{
bool operator ()(const std::pair<_T1, _T2>& __p1,
const std::pair<_T1, _T2>& __p2)
{
return __p1.second < __p2.second;
}
};
template<typename _RandomAccessIterator>
inline void
sort_pairs(_RandomAccessIterator __first, _RandomAccessIterator __last)
{
typedef typename iterator_traits<_RandomAccessIterator>::value_type
_ValueType;
typedef typename _ValueType::first_type _FirstType;
typedef typename _ValueType::second_type _SecondType;
sort(__first, __last, __less_pair_first<_FirstType, _SecondType>());
if (__first == __last)
return;
for (_RandomAccessIterator __next = __first + 1;
__next != __last; __next++)
{
if ((*__first).first < (*__next).first)
{
if (__first + 1 != __next)
sort (__first, __next, __less_pair_second<_FirstType, _SecondType>());
__first = __next;
}
}
if (__first + 1 != __last)
sort (__first, __last, __less_pair_second<_FirstType, _SecondType>());
}
}
template<typename T1, typename T2>
struct generator
{
int i;
const int range;
generator (int r) : i (0), range (r) { }
std::pair<T1, T2> operator () (void) { return std::make_pair (rand () % range, i++); }
};
template<typename T1, typename T2>
void benchmark (int n, int r)
{
std::vector< std::pair<T1, T2> > v;
std::generate_n (std::back_inserter (v), n, generator<T1, T2> (r));
std::vector< std::pair<T1, T2> > w = v;
std::clock_t st = std::clock ();
std::sort_pairs (w.begin (), w.end ());
std::clock_t et = std::clock ();
float t1 = static_cast<float> (et - st)/CLOCKS_PER_SEC;
st = std::clock ();
std::sort (v.begin (), v.end ());
et = std::clock ();
float t2 = static_cast<float> (et - st)/CLOCKS_PER_SEC;
std::cout << "timings: sort_pairs = " << t1 << " sec"
<< " std::sort = " << t2 << " sec" << " speed-up = " << 100*(t2/t1-1) << "%" << std::endl;
if (std::equal (v.begin (), v.end (), w.begin ()))
std::cout << "results identical" << std::endl;
else
std::cout << "results differ!" << std::endl;
}
int main ()
{
srand (time (NULL));
std::cout << "test int, int:\n";
benchmark<int, int> (5000000, 20000000);
std::cout << "test int, int with small range:\n";
benchmark<int, int> (5000000, 50000);
std::cout << "test double, int:\n";
benchmark<double, int> (5000000, 20000000);
std::cout << "test double, int with small range:\n";
benchmark<double, int> (5000000, 50000);
}