On 01/11/2017 07:10 PM, Phil Bouchard wrote:
Moreover I will try part of the following kernel level memory manager
sooner or later:
https://github.com/tempesta-tech/tempesta
I just wanted to close the subject with the following note:
I did try the user-space version of the Tempesta Tech memory pool (see
attached) and I get allocations 473% times faster than the system
operator new (on an Intel i7 2.4 GHz)!
0: 100000000000000 allocations / second (no allocation)
1: 38739974.57882868 allocations / second (system operator new)
2: 183240796.7309842 allocations / second (tfwpool)
3: 77259572.07468219 allocations / second (boost::pool_allocator)
4: 79126882.32962202 allocations / second (boost::pool_allocator)
5: 238646397.6326277 allocations / second (boost::fast_pool_allocator)
6: 254717365.6111179 allocations / second (boost::fast_pool_allocator)
2 / 1: 473.001850732047% boost
The boost::fast_pool_allocator is as fast but there is no need to define
the type at compile-time.
Thanks for your time,
-Phil
/*
* Tempesta FW pool
* Simplified and ported to user-space from
* https://github.com/natsys/tempesta/blob/master/tempesta_fw/pool.c
*
* Copyright (C) 2015 Alexander Krizhanovsky (a...@natsys-lab.com).
*
* Modified in 2017 by Phil Bouchard (philipp...@gmail.com)
*
* This file is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* See http://www.gnu.org/licenses/lgpl.html .
*
* Compiled with: g++-4.9 pool-benchmark.cpp -o pool-benchmark -lboost_timer -lboost_system -lpthread -O3 -std=c++14 -DBOOST_DISABLE_THREADS
*/
#include <limits>
#include <iomanip>
#include <iostream>
#include <boost/timer.hpp>
#include <boost/pool/pool_alloc.hpp>
#include <boost/pool/object_pool.hpp>
using namespace std;
using namespace boost;
static const size_t PAGE_SIZE = 4096;
// sizeof(TfwStr)
struct Small {
long l[3];
std::string
operator+(const char *str)
{ return std::string(str) + " (Small)"; }
};
// size of common HTTP headers table
struct Big {
Small s[10];
std::string
operator+(const char *str)
{ return std::string(str) + " (Big)"; }
};
// very big allocation
struct Huge {
char c[PAGE_SIZE * 2 + 113];
std::string
operator+(const char *str)
{ return std::string(str) + " (Huge)"; }
};
static const size_t N = 20 * 1000 * 1000;
/*
* ------------------------------------------------------------------------
* Tempesta FW pool
* Simplified and ported to user-space from
* https://github.com/natsys/tempesta/blob/master/tempesta_fw/pool.c
* ------------------------------------------------------------------------
*/
/**
* Emulation of Linux buddy allocator.
*/
#define GFP_ATOMIC 0
#define PAGE_MASK (~(PAGE_SIZE - 1))
#define likely(x) __builtin_expect(x, 1)
#define unlikely(x) __builtin_expect(x, 0)
#define free_pages(p, o) free((void *)(p))
#define get_order(n) (assert((n) < PAGE_SIZE * 128), \
(n) < PAGE_SIZE ? 0 \
: (n) < PAGE_SIZE * 2 ? 1 \
: (n) < PAGE_SIZE * 4 ? 2 \
: (n) < PAGE_SIZE * 8 ? 3 \
: (n) < PAGE_SIZE * 16 ? 4 \
: (n) < PAGE_SIZE * 32 ? 5 \
: (n) < PAGE_SIZE * 64 ? 6 \
: 7)
static inline unsigned long
__get_free_pages(int flags __attribute__((unused)), int order)
{
void *p;
if (posix_memalign((void **)&p, PAGE_SIZE, PAGE_SIZE << order))
return 0;
return (unsigned long)p;
}
/**
* Memory pool chunk descriptor.
*
* @next - pointer to next memory chunk;
* @order - order of number of pages in the chunk;
* @off - current chunk offset;
*/
struct TfwPoolChunk {
TfwPoolChunk *next;
unsigned int order;
unsigned int off;
};
/**
* Memory pool descriptor.
*
* @curr - current chunk to allocate memory from;
* @order,@off - cached members of @curr;
*/
typedef struct {
TfwPoolChunk *curr;
unsigned int order;
unsigned int off;
} TfwPool;
#define TFW_POOL_CHUNK_SZ(p) (PAGE_SIZE << (p)->order)
#define TFW_POOL_CHUNK_BASE(c) ((unsigned long)(c) & PAGE_MASK)
#define TFW_POOL_CHUNK_END(p) (TFW_POOL_CHUNK_BASE((p)->curr) + (p)->off)
#define TFW_POOL_ALIGN_SZ(n) (((n) + 7) & ~7UL)
#define TFW_POOL_HEAD_OFF (TFW_POOL_ALIGN_SZ(sizeof(TfwPool)) \
+ TFW_POOL_ALIGN_SZ(sizeof(TfwPoolChunk)))
#define TFW_POOL_PGCACHE_SZ 256
static unsigned int pg_next;
static unsigned long pg_cache[TFW_POOL_PGCACHE_SZ];
static unsigned long
tfw_pool_alloc_pages(unsigned int order)
{
if (likely(pg_next && !order))
return pg_cache[--pg_next];
return __get_free_pages(GFP_ATOMIC, order);
}
static void
tfw_pool_free_pages(unsigned long addr, unsigned int order)
{
if (likely(pg_next < TFW_POOL_PGCACHE_SZ && !order)) {
pg_cache[pg_next++] = addr;
} else {
free_pages(addr, order);
}
}
TfwPool *
__tfw_pool_new(size_t n)
{
TfwPool *p;
TfwPoolChunk *c;
unsigned int order;
order = get_order(TFW_POOL_ALIGN_SZ(n) + TFW_POOL_HEAD_OFF);
c = (TfwPoolChunk *)tfw_pool_alloc_pages(order);
if (unlikely(!c))
return NULL;
p = (TfwPool *)((char *)c + TFW_POOL_ALIGN_SZ(sizeof(*c)));
c->next = NULL;
p->order = order;
p->off = TFW_POOL_HEAD_OFF;
p->curr = c;
return p;
}
void *
tfw_pool_alloc(TfwPool *p, size_t n)
{
void *a;
n = TFW_POOL_ALIGN_SZ(n);
if (unlikely(p->off + n > TFW_POOL_CHUNK_SZ(p))) {
TfwPoolChunk *c, *curr = p->curr;
unsigned int off = TFW_POOL_ALIGN_SZ(sizeof(TfwPoolChunk)) + n;
unsigned int order = get_order(off);
c = (TfwPoolChunk *)tfw_pool_alloc_pages(order);
if (!c)
return NULL;
c->next = curr;
curr->order = p->order;
curr->off = p->off;
p->order = order;
p->off = off;
p->curr = c;
return (void *)TFW_POOL_ALIGN_SZ((unsigned long)(c + 1));
}
a = (char *)TFW_POOL_CHUNK_END(p);
p->off += n;
return a;
}
void
tfw_pool_free(TfwPool *p, void *ptr, size_t n)
{
n = TFW_POOL_ALIGN_SZ(n);
/* Stack-like usage is expected. */
if (unlikely((char *)ptr + n != (char *)TFW_POOL_CHUNK_END(p)))
return;
p->off -= n;
/* Free empty chunk which doesn't contain the pool header. */
if (unlikely(p->off == TFW_POOL_ALIGN_SZ(sizeof(TfwPoolChunk)))) {
TfwPoolChunk *next = p->curr->next;
tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(p->curr), p->order);
p->curr = next;
p->order = next->order;
p->off = next->off;
}
}
void
tfw_pool_destroy(TfwPool *p)
{
TfwPoolChunk *c, *next;
for (c = p->curr; c; c = next) {
next = c->next;
tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(c), c->order);
}
}
template <typename T>
class tfw_allocator
{
public:
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
template <class U>
struct rebind
{
typedef tfw_allocator<U> other;
};
pointer address (reference value) const
{
return &value;
}
const_pointer address (const_reference value) const
{
return &value;
}
tfw_allocator() throw()
{
p = __tfw_pool_new(0);
}
~tfw_allocator() throw()
{
tfw_pool_destroy(p);
}
size_type max_size () const throw()
{
return std::numeric_limits<std::size_t>::max() / sizeof(T);
}
pointer allocate (size_type num, const void* = 0)
{
return (pointer) tfw_pool_alloc(p, num * sizeof(T));
}
void construct (pointer p, const T& value)
{
new ((void*) p) T(value);
}
void destroy (pointer p)
{
p->~T();
}
void deallocate (pointer p, size_type num)
{
tfw_pool_free(p);
}
private:
TfwPool * p;
};
int main()
{
double speed[7];
long const n = 100000000;
{
timer t;
for (int i = 0; i < n; ++ i)
;
speed[0] = n / t.elapsed();
}
{
timer t;
for (int i = 0; i < n; ++ i)
::operator new (sizeof(int));
speed[1] = n / t.elapsed();
}
{
tfw_allocator<int> p;
timer t;
for (int i = 0; i < n; ++ i)
p.allocate(1);
speed[2] = n / t.elapsed();
}
{
pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex> p;
timer t;
for (int i = 0; i < n; ++ i)
p.allocate(1);
speed[3] = n / t.elapsed();
}
{
pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex, n> p;
timer t;
for (int i = 0; i < n; ++ i)
p.allocate(1);
speed[4] = n / t.elapsed();
}
{
fast_pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex> p;
timer t;
for (int i = 0; i < n; ++ i)
p.allocate(1);
speed[5] = n / t.elapsed();
}
{
fast_pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex, n> p;
timer t;
for (int i = 0; i < n; ++ i)
p.allocate(1);
speed[6] = n / t.elapsed();
}
cout << setprecision(numeric_limits<double>::digits10 + 1);
for (int i = 0; i < 7; ++ i)
cout << i << ": " << speed[i] << " allocations / second" << endl;
cout << 2 << " / " << 1 << ": " << speed[2] / speed[1] * 100 << "% boost" << endl;
return 0;
}
_______________________________________________
Development mailing list
Development@qt-project.org
http://lists.qt-project.org/mailman/listinfo/development