Hi all,

I've been experimenting with sorting recently and I have noticed a
possibility to slightly optimize the sorting of std::pair values using
the default < operator. This is, I believe, a common usage case to
retrieve sorting indices (better locality of reference than sorting
via pointers). At least I usually do it that way :)

The idea is simple: we know how the < operator for std::pairs looks
like; basically it's
x.first < y.first || (! (y.first < x.first) && x.second < y.second)

to reduce conditional branching in comparisons, we can delay
evaluating the secondary conditions like this:
1. sort by first components
2. identify identical runs
3. sort each run by second components

The attached source contains a proposed simple sort_pairs
implementation plus a benchmark code.

Using g++ -O3 -march=native, on my Core 2 Duo @ 2.83 GHz, I get for
instance the following results:

ha...@hajek:~/devel/scratch> ./a.out
test int, int:
timings: sort_pairs = 0.47 sec std::sort = 0.51 sec  speed-up = 8.51064%
results identical
test int, int with small range:
timings: sort_pairs = 0.52 sec std::sort = 0.6 sec  speed-up = 15.3846%
results identical
test double, int:
timings: sort_pairs = 0.6 sec std::sort = 0.64 sec  speed-up = 6.66666%
results identical
test double, int with small range:
timings: sort_pairs = 0.68 sec std::sort = 0.71 sec  speed-up = 4.41176%
results identical

and in general the speed-ups vary considerably (it's random data), but
I failed to produce anything below 3%.

The question is whether this slight optimization is worth including in
GCC's C++ library - std::sort could dispatch to sort_pairs
automatically when iterator's ValueType is a std::pair instance using
a trait mechanism (not yet implemented).

best regards and thanks for GCC

-- 
RNDr. Jaroslav Hajek, PhD
computing expert & GNU Octave developer
Aeronautical Research and Test Institute (VZLU)
Prague, Czech Republic
url: www.highegg.matfyz.cz
// Copyright (C) 2010 Jaroslav Hajek
#include <iostream>
#include <ctime>
#include <vector>
#include <algorithm>
#include <iterator>
#include <utility>

namespace std
{
  template <typename _T1, typename _T2>
    struct __less_pair_first
      {
        bool operator ()(const std::pair<_T1, _T2>& __p1,
                         const std::pair<_T1, _T2>& __p2)
          {
            return __p1.first < __p2.first;
          }
      };

  template <typename _T1, typename _T2>
    struct __less_pair_second
      {
        bool operator ()(const std::pair<_T1, _T2>& __p1,
                         const std::pair<_T1, _T2>& __p2)
          {
            return __p1.second < __p2.second;
          }
      };


  template<typename _RandomAccessIterator>
    inline void
    sort_pairs(_RandomAccessIterator __first, _RandomAccessIterator __last)
    {
      typedef typename iterator_traits<_RandomAccessIterator>::value_type
	_ValueType;
      typedef typename _ValueType::first_type _FirstType;
      typedef typename _ValueType::second_type _SecondType;

      sort(__first, __last, __less_pair_first<_FirstType, _SecondType>());
      if (__first == __last)
        return;

      for (_RandomAccessIterator __next = __first + 1; 
           __next != __last; __next++)
        {
          if ((*__first).first < (*__next).first)
            {
              if (__first + 1 != __next)
                sort (__first, __next, __less_pair_second<_FirstType, _SecondType>());
              __first = __next;
            }
        }

      if (__first + 1 != __last)
        sort (__first, __last, __less_pair_second<_FirstType, _SecondType>());
    }
}

template<typename T1, typename T2>
struct generator
{
  int i;
  const int range;
  generator (int r) : i (0), range (r) { }
  std::pair<T1, T2> operator () (void) { return std::make_pair (rand () % range, i++); }
};

template<typename T1, typename T2>
void benchmark (int n, int r)
{
  std::vector< std::pair<T1, T2> > v;
  std::generate_n (std::back_inserter (v), n, generator<T1, T2> (r));

  std::vector< std::pair<T1, T2> > w = v;

  std::clock_t st = std::clock ();

  std::sort_pairs (w.begin (), w.end ());

  std::clock_t et = std::clock ();
  float t1 = static_cast<float> (et - st)/CLOCKS_PER_SEC;

  st = std::clock ();

  std::sort (v.begin (), v.end ());

  et = std::clock ();
  float t2 = static_cast<float> (et - st)/CLOCKS_PER_SEC;

  std::cout << "timings: sort_pairs = " << t1 << " sec"
    << " std::sort = " << t2 << " sec" << "  speed-up = " << 100*(t2/t1-1) << "%" << std::endl;

  if (std::equal (v.begin (), v.end (), w.begin ()))
    std::cout << "results identical" << std::endl;
  else
    std::cout << "results differ!" << std::endl;
}

int main ()
{
  srand (time (NULL));

  std::cout << "test int, int:\n";
  benchmark<int, int> (5000000, 20000000);

  std::cout << "test int, int with small range:\n";
  benchmark<int, int> (5000000, 50000);

  std::cout << "test double, int:\n";
  benchmark<double, int> (5000000, 20000000);

  std::cout << "test double, int with small range:\n";
  benchmark<double, int> (5000000, 50000);
}

Reply via email to