DIVYA updated this revision to Diff 111027.

https://reviews.llvm.org/D36423

Files:
  benchmarks/algorithms.bench.cpp
  include/algorithm
  include/rng_utils.h
  include/test_configs.h
  include/test_utils.h

Index: include/test_utils.h
===================================================================
--- /dev/null
+++ include/test_utils.h
@@ -0,0 +1,270 @@
+#ifndef TEST_UTILS_H
+#define TEST_UTILS_H
+
+#include "rng_utils.h"
+#include<cstdlib>
+#include<algorithm>
+
+// TODO: Add more aggregates.
+struct aggregate {
+  int first;
+  int second;
+  int third;
+  int fourth;
+  aggregate() : first(0), second(0), third(0), fourth(0)
+  {}
+  // This is a hacky constructor for ::find on associative containers to work.
+  aggregate(int i)
+    : first(i), second(i), third(i), fourth(i)
+  {}
+  aggregate(int i, int j, int k, int l)
+    : first(i), second(j), third(k), fourth(l)
+  {}
+
+  aggregate& operator++() {
+    ++first;
+    ++second;
+    ++third;
+    ++fourth;
+    return *this;
+  }
+  aggregate operator++(int) {
+    aggregate N(*this);
+    ++(*this);
+    return N;
+  }
+
+  bool operator<(const aggregate& i) const {
+    return first < i.first;
+  }
+
+  bool operator>(const aggregate& i) const {
+    return i < *this;
+  }
+
+  bool operator==(const aggregate& i) const {
+    return first == i.first;
+  }
+
+  bool operator!=(const aggregate& i) const {
+    return !(*this == i);
+  }
+};
+
+// Hasher for aggregate data type.
+namespace std {
+  template <>
+  struct hash<aggregate>
+  {
+    std::size_t operator()(const aggregate& k) const
+    {
+      using std::hash;
+      // Hash and combine using bit-shift.
+      return ((hash<int>()(k.first)
+               ^ (hash<int>()(k.second) << 1)) >> 1)
+               ^ (hash<int>()(k.third) << 1)
+               ^ (hash<int>()(k.fourth) << 1);
+    }
+  };
+}
+
+template<typename T>
+struct remove_const { typedef T type; };
+
+// value_type of a std::map is std::pair<const KeyType, ValueType>
+template<typename T>
+struct remove_const<std::pair<const T, T>> { typedef std::pair<T, T> type; };
+
+template<typename T>
+T get_rand(random_device &r, int max) {
+  return r.get_rand(T(0), T(max));
+}
+
+template<>
+std::pair<int, int> get_rand<std::pair<int, int>>(random_device &r, int max) {
+  return std::make_pair(r.get_rand(0, max), r.get_rand(0, max));
+}
+
+template<>
+aggregate get_rand<aggregate>(random_device &r, int max) {
+  return aggregate(r.get_rand(0, max));
+}
+
+template<>
+std::pair<aggregate, aggregate>
+get_rand<std::pair<aggregate, aggregate>>(random_device &r, int max) {
+  return std::make_pair(r.get_rand(0, max), r.get_rand(0, max));
+}
+
+template<typename T>
+T increment(T &i) {
+  return ++i;
+}
+
+// value_type of a std::map is std::pair<const KeyType, ValueType>
+template<>
+std::pair<int, int> increment<std::pair<int, int>>(std::pair<int, int> &i) {
+  return std::make_pair(++i.first, i.second);
+}
+
+template<>
+std::pair<aggregate, aggregate>
+increment<std::pair<aggregate, aggregate>>(std::pair<aggregate, aggregate> &i) {
+  return std::make_pair(++i.first, i.second);
+}
+
+template<typename T>
+T init() {
+  return T(0);
+}
+
+template<>
+std::pair<int, int> init<std::pair<int, int>>() {
+  return std::make_pair(0, 0);
+}
+
+template<>
+std::pair<aggregate, aggregate> init<std::pair<aggregate, aggregate>>() {
+  return std::make_pair(0, 0);
+}
+
+template <template <class, class> class Container, class value_type>
+void fill_random(Container<value_type, std::allocator<value_type>> &v,
+                int max = RAND_MAX) {
+  random_device r;
+  for (auto &e : v)
+    e = get_rand<value_type>(r, max);
+}
+
+template <typename T>
+void fill_random(T begin, T end, int max = RAND_MAX) {
+  typedef typename std::iterator_traits<T>::value_type value_type;
+  random_device r;
+  for (auto it = begin; it != end; ++it)
+    *it = get_rand<value_type>(r, max);
+}
+
+// It can work with char* or std::string.
+template<typename T>
+void fill_random_chars(T begin, T end, bool upper) {
+  char max = upper ? 'Z' : 'z';
+  char min = upper ? 'A' : 'a';
+  auto it = begin;
+  typedef typename std::iterator_traits<T>::value_type value_type;
+  random_device r;
+  for (; it != end -1; ++it) {
+    *it = get_rand<value_type>(r, max) * (max - min) + min;
+    assert(*it >= min);
+    assert(*it <= max);
+  }
+  *it = '\0';
+}
+
+// TODO: Create a template class such that all the
+// APIs of STL containers can be exercised in a concise way.
+// for example insert, push_back, pop_back, push_front, pop_front, advance
+
+// TODO: Benchmark memory allocated on heap vs. stack.
+template <typename T>
+void fill_seq(T begin, T end) {
+  typedef typename std::iterator_traits<T>::value_type value_type;
+  //random_device r;
+  value_type j = init<value_type>();// = get_rand<value_type>(r, RAND_MAX);
+  for (auto it = begin; it != end; ++it, increment(j))
+    *it = j;
+}
+
+template <template <class, class> class Container, class value_type>
+void fill_seq(Container<value_type, std::allocator<value_type>> &v) {
+  fill_seq(std::begin(v), std::end(v));
+}
+
+// Size of vector \p v to be constructed is \p size
+template <typename T>
+void make_killer(int size, std::vector<T>& v) {
+  int candidate = 0;
+  int num_solid = 0;
+  int gas = size - 1;
+
+  std::vector<T> tmp(size);
+  v.resize(size);
+
+  for (T i = 0; i < size; ++i) {
+    tmp[i] = i;
+    v[i] = gas;
+  }
+
+  std::sort(tmp.begin(), tmp.end(), [&](T x, T y) {
+      if (v[x] == gas && v[y] == gas) {
+        if (x == candidate) v[x] = num_solid++;
+        else v[y] = num_solid++;
+      }
+
+      if (v[x] == gas) candidate = x;
+      else if (v[y] == gas) candidate = y;
+
+      return v[x] < v[y];
+    });
+}
+
+// c-style comparator for integral types.
+template<typename T>
+static int compare(const void *a, const void *b)
+{
+  static_assert(std::is_integral<T>::value, "Not an integral type.");
+  return (*(T*)a - *(T*)b);
+}
+
+// Custom allocator to manage memory yet keep C like semantics
+// for building C standard library benchmarks.
+template<typename T>
+class c_alloc {
+  public:
+    c_alloc(int N) : p((T*)malloc(N*sizeof(T))) {}
+    ~c_alloc() { free(p); }
+    // Dangerous but simple if used properly.
+    T* get() { return p; }
+    operator T*() { return p; }
+    T* operator+(int N) { return p+N; }
+    const T* operator+(int N) const { return p+N; }
+    const T& operator[] (int N) const { return p[N]; }
+    T& operator[] (int N) { return p[N]; }
+  private:
+    T* p;
+};
+
+// Call a unary function for N real numbers
+#define BM_unary_real(Name) template<typename T> \
+void BM_##Name(benchmark::State& state) {\
+  int N = state.range(0);\
+  c_alloc<T> a(N);\
+  fill_random(a.get(), a.get()+N);\
+  int i = 0;\
+  while (state.KeepRunning()) {\
+    T p = Name(a[i]);\
+    benchmark::DoNotOptimize(p);\
+    if (i++ == N)\
+      i = 0;\
+  }\
+  state.SetComplexityN(N);\
+}
+
+// Call a binary function for N real numbers
+#define BM_binary_real(Name) template<typename T> \
+void BM_##Name(benchmark::State& state) {\
+  int N = state.range(0);\
+  c_alloc<T> a(N);\
+  fill_random(a.get(), a.get()+N);\
+  random_device r;\
+  int i = 0;\
+  while (state.KeepRunning()) {\
+    T p = Name(a[i], get_rand<int>(r, RAND_MAX));\
+    benchmark::DoNotOptimize(p);\
+    if (i++ == N)\
+      i = 0;\
+  }\
+  state.SetComplexityN(N);\
+}
+
+#endif // TEST_UTILS_H
+
Index: include/test_configs.h
===================================================================
--- /dev/null
+++ include/test_configs.h
@@ -0,0 +1,59 @@
+#ifndef TEST_CONFIGS_H
+#define TEST_CONFIGS_H
+
+#define KB << 10
+#define MB << 20
+#define GB << 30
+
+#define i7_4770
+
+
+// Configurations for i7_4770
+#ifdef i7_4770
+// To benchmark data residing completely in L1 cache.
+#ifdef ENABLE_TRAVIS_BUILD
+#define L1 (32 KB)
+// To benchmark data residing in L2 cache.
+#define L2 (256 KB)
+#else
+// For the Travis CI to run the entire test.
+#define L1 (16 KB)
+#define L2 (32 KB)
+#endif
+
+// To benchmark data residing in L3 cache.
+#define L3 (8192 KB)
+// To benchmark data residing in main memory.
+#define MEMORY (12 GB)
+#endif
+
+#define SINGLE_ARG(...) __VA_ARGS__
+
+#define BASIC_BENCHMARK_TEST(x) BENCHMARK(x)->RangeMultiplier(2)\
+                                ->Range(L1, L2)
+
+#define COMPLEXITY_BENCHMARK(x, CACHE_TYPE) BENCHMARK(x)->RangeMultiplier(2)\
+                                ->Range(L1, CACHE_TYPE)->Complexity()
+
+#define COMPLEXITY_BENCHMARK_GEN(x, y, CACHE_TYPE) BENCHMARK_TEMPLATE(x, y)\
+                                ->RangeMultiplier(2)->Range(L1, CACHE_TYPE)\
+                                ->Complexity()
+#endif // TEST_CONFIGS_H
+
+constexpr int MSize = L2;
+
+#if defined(__clang__)
+#define COMPILER_CLANG
+#elif defined(__GNUC__)
+#define COMPILER_GCC
+#elif defined(_MSC_VER)
+#define COMPILER_MSVC
+#endif
+
+#if defined(COMPILER_GCC) || defined(COMPILER_CLANG)
+#define ATTR_NOINLINE __attribute__((noinline))
+#elif defined(COMPILER_MSVC)
+#define ATTR_NOINLINE __declspec(noinline)
+#else
+#define ATTR_NOINLINE
+#endif
Index: include/rng_utils.h
===================================================================
--- /dev/null
+++ include/rng_utils.h
@@ -0,0 +1,47 @@
+#ifndef RNG_UTILS_H
+#define RNG_UTILS_H
+#include<random>
+
+template<typename T> struct uniform_distribution {
+  typedef void type; // error
+};
+
+template<> struct uniform_distribution<char> {
+   typedef std::uniform_int_distribution<int> type;
+};
+
+template<> struct uniform_distribution<int> {
+   typedef std::uniform_int_distribution<int> type;
+};
+
+template<> struct uniform_distribution<unsigned> {
+   typedef std::uniform_int_distribution<unsigned> type;
+};
+
+template<> struct uniform_distribution<float> {
+   typedef std::uniform_real_distribution<float> type;
+};
+
+template<> struct uniform_distribution<double> {
+   typedef std::uniform_real_distribution<double> type;
+};
+
+template<class T> using gen = typename uniform_distribution<T>::type;
+
+class random_device {
+public:
+  template<typename T>
+  T get_rand(T min, T max) {
+    std::mt19937 e(rd()); // seed the generator
+    gen<T> d(min, max); // define the range
+    return d(e);
+  }
+
+private:
+  // TODO: Fix the seed to a constant.
+  std::random_device rd; // obtain a random number from hardware
+};
+
+
+#endif // RNG_UTILS_H
+
Index: include/algorithm
===================================================================
--- include/algorithm
+++ include/algorithm
@@ -642,6 +642,7 @@
 #include <utility> // needed to provide swap_ranges.
 #include <memory>
 #include <iterator>
+#include <cmath>
 #include <cstddef>
 
 #if defined(__IBMCPP__)
@@ -3994,7 +3995,14 @@
 
 template <class _Compare, class _RandomAccessIterator>
 void
-__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+__partial_sort(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator,
+            _Compare);
+
+// Using introsort algorithm for sorting
+template <class _Compare, class _RandomAccessIterator>
+void
+__intro_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, 
+           typename iterator_traits<_RandomAccessIterator>::difference_type __depth_limit)
 {
     // _Compare is known to be a reference type
     typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
@@ -4029,6 +4037,12 @@
             _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp);
             return;
         }
+        if (__depth_limit == 0)
+        {
+            __partial_sort<_Compare>(__first,__last,__last,__comp);
+            return;
+        }
+
         // __len > 5
         _RandomAccessIterator __m = __first;
         _RandomAccessIterator __lm1 = __last;
@@ -4172,19 +4186,33 @@
         // sort smaller range with recursive call and larger with tail recursion elimination
         if (__i - __first < __last - __i)
         {
-            _VSTD::__sort<_Compare>(__first, __i, __comp);
-            // _VSTD::__sort<_Compare>(__i+1, __last, __comp);
+            _VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit);
+            // _VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit);
             __first = ++__i;
         }
         else
         {
-            _VSTD::__sort<_Compare>(__i+1, __last, __comp);
-            // _VSTD::__sort<_Compare>(__first, __i, __comp);
+            _VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit);
+            // _VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit);
             __last = __i;
         }
+        --__depth_limit;
     }
 }
 
+template <class _Compare, class _RandomAccessIterator>
+void
+__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+{
+
+  // Threshold(or depth limit) of 2*log2(size) gives the best performance for worst case
+  typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+  const difference_type __dp = __last - __first;
+  difference_type __depth_limit = __last == __first ? 0 : _VSTD::log2(__dp);
+  __depth_limit *= 2;
+  __intro_sort<_Compare>(__first, __last, __comp, __depth_limit);
+}
+
 // This forwarder keeps the top call and the recursive calls using the same instantiation, forcing a reference _Compare
 template <class _RandomAccessIterator, class _Compare>
 inline _LIBCPP_INLINE_VISIBILITY
Index: benchmarks/algorithms.bench.cpp
===================================================================
--- benchmarks/algorithms.bench.cpp
+++ benchmarks/algorithms.bench.cpp
@@ -4,9 +4,25 @@
 
 #include "benchmark/benchmark_api.h"
 #include "GenerateInput.hpp"
+#include "test_configs.h"
+#include "test_utils.h"
+
+#include<algorithm>
+#include<chrono>
+#include<deque>
+#include<functional>
+#include<list>
 
 constexpr std::size_t TestNumInputs = 1024;
 
+#define START_TIMER auto start = std::chrono::high_resolution_clock::now();
+#define STOP_TIMER  auto end = std::chrono::high_resolution_clock::now();\
+                    auto elapsed_seconds =\
+                    std::chrono::duration_cast<std::chrono::duration<double>>(\
+                    end - start);\
+                    state.SetIterationTime(elapsed_seconds.count());
+
+
 template <class GenInputs>
 void BM_Sort(benchmark::State& st, GenInputs gen) {
     using ValueType = typename decltype(gen(0))::value_type;
@@ -31,6 +47,88 @@
     }
 }
 
+template<typename V>
+void BM_sort_std_common(benchmark::State& state) {
+  int N = state.range(0);
+  V v(N);
+  fill_random(v);
+  using T = typename V::value_type;
+  while (state.KeepRunning()) {
+    std::vector<T> vec(v.begin(), v.end());
+    START_TIMER
+    std::sort(vec.begin(), vec.end());
+    STOP_TIMER
+  }
+  state.SetComplexityN(N);
+}
+
+template<typename V>
+void BM_sort_std_list_with_vector(benchmark::State& state) {
+  int N = state.range(0);
+  V v(N);
+  fill_random(v);
+  using T = typename V::value_type;
+  // Copy the contents into a vector
+  while (state.KeepRunning()) {
+    std::vector<T> vec(v.begin(), v.end());
+    // Sort the vector
+    std::sort(vec.begin(), vec.end());
+    // Put the item back in the list
+    v.assign(vec.begin(), vec.end());
+  }
+  state.SetComplexityN(N);
+}
+
+// Sort (a sequence in ascending order) in ascending order.
+template<typename V>
+void BM_sort_std_ascending(benchmark::State& state) {
+  int N = state.range(0);
+  using T = typename V::value_type;
+  V v(N);
+  fill_seq(v);
+  while (state.KeepRunning()) {
+    std::vector<T> vec(v.begin(), v.end());
+    START_TIMER
+    std::sort(vec.begin(), vec.end(), std::less<T>());
+    STOP_TIMER
+  }
+  state.SetComplexityN(N);
+}
+
+// Sort (a sequence in ascending order) in descending order.
+template<typename V>
+void BM_sort_std_descending(benchmark::State& state) {
+  int N = state.range(0);
+  using T = typename V::value_type;
+  V v(N);
+  fill_seq(v);
+  while (state.KeepRunning()) {
+    std::vector<T> vec(v.begin(), v.end());
+    START_TIMER
+    std::sort(vec.begin(), vec.end(), std::greater<T>());
+    STOP_TIMER
+  }
+  state.SetComplexityN(N);
+}
+
+template<typename V>
+void BM_sort_std_worst_quick(benchmark::State& state) {
+  int N = state.range(0);
+  using T = typename V::value_type;
+  V v;
+  make_killer(N, v);
+  while (state.KeepRunning()) {
+    std::vector<T> vec(v.begin(), v.end());
+    START_TIMER
+    std::sort(vec.begin(), vec.end());
+    STOP_TIMER
+  }
+  state.SetComplexityN(N);
+}
+
+
+
+
 BENCHMARK_CAPTURE(BM_Sort, random_uint32,
     getRandomIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 
@@ -58,5 +156,17 @@
 BENCHMARK_CAPTURE(BM_Sort, single_element_strings,
     getDuplicateStringInputs)->Arg(TestNumInputs);
 
+#define COMPLEXITY_BENCHMARK_GEN_T(T) \
+    COMPLEXITY_BENCHMARK_GEN(BM_sort_std_common, std::vector<T>, MSize);\
+    COMPLEXITY_BENCHMARK_GEN(BM_sort_std_ascending, std::vector<T>, MSize);\
+    COMPLEXITY_BENCHMARK_GEN(BM_sort_std_descending, std::vector<T>, MSize);\
+
+COMPLEXITY_BENCHMARK_GEN_T(int)
+//COMPLEXITY_BENCHMARK_GEN_T(double)
+COMPLEXITY_BENCHMARK_GEN_T(aggregate)
+
+COMPLEXITY_BENCHMARK_GEN(BM_sort_std_list_with_vector, std::list<int>, MSize);
+COMPLEXITY_BENCHMARK_GEN(BM_sort_std_list_with_vector, std::list<aggregate>, MSize);
+COMPLEXITY_BENCHMARK_GEN(BM_sort_std_worst_quick, std::vector<int>, MSize);
 
 BENCHMARK_MAIN()
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to