On 01/07/2017 12:58 PM, Phil Bouchard wrote:
On 01/07/2017 12:11 PM, Phil Bouchard wrote:
On 01/06/2017 10:37 PM, Phil Bouchard wrote:
On 01/06/2017 08:05 PM, Phil Bouchard wrote:
On 01/06/2017 07:17 PM, Phil Bouchard wrote:
Just to conclude I did try the attached benchmark and I get the
following on a x86_64 @ 2.40 GHz:
0: 61331143.40263957 allocations / second
1: 63644162.93924019 allocations / second
2: 177628727.5388474 allocations / second
3: 179850939.5413082 allocations / second
1 / 0: 103.7713621632905% boost
2 / 1: 101.2510431354494% boost
So the fast_pool_allocator is already pretty fast and I can only get a
1% speed boost by allocating big memory blocks. So it doesn't look
like
there is any way to make the fast_pool_allocator any faster.
Correction: I get a 11% speed boost with the -O3 flag:
0: 60387275.67636769 allocations / second
1: 63133704.55951615 allocations / second
2: 169169529.8609596 allocations / second
3: 188535531.4062488 allocations / second
1 / 0: 104.5480258090584% boost
2 / 1: 111.447688931456% boost
So I am not sure if this is worth the trouble.
Did you know the optimized boost::fast_pool_allocator is 305% times
faster than the regular one?
0: 61407861.18875794 allocations / second
1: 62565419.96725326 allocations / second
2: 175311265.1512761 allocations / second
3: 187319938.708916 allocations / second
1 / 0: 101.8850335381934% boost
2 / 1: 106.8499155187076% boost
3 / 0: 305.0422781101666% boost
Is Qt using a similar pool allocator?
This means the optimized boost::fast_allocator is 470% times faster than
the system "operator new":
0: 37334468.30116969 allocations / second (operator new)
1: 58554420.18534816 allocations / second (boost::pool_allocator)
2: 60072218.82146717 allocations / second (boost::pool_allocator)
3: 164895686.9884111 allocations / second (boost::fast_pool_allocator)
4: 175697519.1510296 allocations / second (boost::fast_pool_allocator)
2 / 1: 102.5921162421463% boost
4 / 3: 106.550706304027% boost
4 / 0: 470.6040480708413% boost
I just tried the "jemalloc" library and I get a 2576228000% speed boost
compared to operator new:
0: 38816440.15979952 allocations / second
1: 62188319.91413037 allocations / second
2: 63819182.38692677 allocations / second
3: 174727730.5139267 allocations / second
4: 186025745.9632413 allocations / second
5: 1000000000000000 allocations / second
5 / 0: 2576228000% boost
5 / 1: 1608019000% boost
Sorry, false alarm. I think I was misusing jemalloc. Now
boost::fast_pool_allocator is the winner with a 332% speed boost
compared to the normal boost::pool_allocator:
0: inf allocations / second (no allocation)
1: 54435751.65974607 allocations / second (jemalloc)
2: 77056206.33802707 allocations / second (boost::pool_allocator)
3: 80152739.05955189 allocations / second (boost::pool_allocator)
4: 236036104.0824805 allocations / second (boost::fast_pool_allocator)
5: 256307733.3169296 allocations / second (boost::fast_pool_allocator)
5 / 2: 332.6243861429787% boost
// Configured with: ./configure --with-jemalloc-prefix=je_
// Compiled with: g++-4.9 pool-benchmark.cpp -o pool-benchmark -lboost_timer -lboost_system -ljemalloc -O3 -std=c++14 -DBOOST_DISABLE_THREADS
#include <limits>
#include <iomanip>
#include <iostream>
#include <boost/timer.hpp>
#include <boost/pool/pool_alloc.hpp>
#include "jemalloc/jemalloc.h"
using namespace std;
using namespace boost;
template <typename T>
class je_allocator
{
public:
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
template <class U>
struct rebind
{
typedef je_allocator<U> other;
};
pointer address (reference value) const
{
return &value;
}
const_pointer address (const_reference value) const
{
return &value;
}
je_allocator() throw()
{
}
je_allocator(const je_allocator&) throw()
{
}
template <class U>
je_allocator (const je_allocator<U>&) throw()
{
}
~je_allocator() throw()
{
}
size_type max_size () const throw()
{
return std::numeric_limits<std::size_t>::max() / sizeof(T);
}
pointer allocate (size_type num, const void* = 0)
{
return (pointer) je_malloc(num);
}
void construct (pointer p, const T& value)
{
new ((void*) p) T(value);
}
void destroy (pointer p)
{
p->~T();
}
void deallocate (pointer p, size_type num)
{
je_free(p);
}
};
template <class T1, class T2>
bool operator== (const je_allocator<T1>&, const je_allocator<T2>&) throw()
{
return true;
}
template <class T1, class T2>
bool operator!= (const je_allocator<T1>&, const je_allocator<T2>&) throw()
{
return false;
}
int main()
{
double speed[6];
static long const n = 100000000;
{
timer t;
for (int i = 0; i < n; ++ i)
;
speed[0] = n / t.elapsed();
}
{
je_allocator<int> p;
timer t;
for (int i = 0; i < n; ++ i)
p.allocate(1);
speed[1] = n / t.elapsed();
}
{
pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex> p;
timer t;
for (int i = 0; i < n; ++ i)
p.allocate(1);
speed[2] = n / t.elapsed();
}
{
pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex, n> p;
timer t;
for (int i = 0; i < n; ++ i)
p.allocate(1);
speed[3] = n / t.elapsed();
}
{
fast_pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex> p;
timer t;
for (int i = 0; i < n; ++ i)
p.allocate(1);
speed[4] = n / t.elapsed();
}
{
fast_pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex, n> p;
timer t;
for (int i = 0; i < n; ++ i)
p.allocate(1);
speed[5] = n / t.elapsed();
}
cout << setprecision(numeric_limits<double>::digits10 + 1);
for (int i = 0; i < 6; ++ i)
cout << i << ": " << speed[i] << " allocations / second" << endl;
cout << 5 << " / " << 2 << ": " << speed[5] / speed[2] * 100 << "% boost" << endl;
return 0;
}
_______________________________________________
Development mailing list
[email protected]
http://lists.qt-project.org/mailman/listinfo/development