Re: [Development] [root_ptr] GC + Automatic Reference Counting

Phil Bouchard Thu, 12 Jan 2017 17:51:51 -0800

On 01/11/2017 07:10 PM, Phil Bouchard wrote:


Moreover I will try part of the following kernel level memory manager
sooner or later:
https://github.com/tempesta-tech/tempesta


I just wanted to close the subject with the following note:

I did try the user-space version of the Tempesta Tech memory pool (seeattached) and I get allocations 473% times faster than the systemoperator new (on an Intel i7 2.4 GHz)!


0: 100000000000000 allocations / second (no allocation)
1: 38739974.57882868 allocations / second (system operator new)
2: 183240796.7309842 allocations / second (tfwpool)
3: 77259572.07468219 allocations / second (boost::pool_allocator)
4: 79126882.32962202 allocations / second (boost::pool_allocator)
5: 238646397.6326277 allocations / second (boost::fast_pool_allocator)
6: 254717365.6111179 allocations / second (boost::fast_pool_allocator)
2 / 1: 473.001850732047% boost

The boost::fast_pool_allocator is as fast but there is no need to definethe type at compile-time.



Thanks for your time,
-Phil

/*
 * Tempesta FW pool
 *  Simplified and ported to user-space from
 *  https://github.com/natsys/tempesta/blob/master/tempesta_fw/pool.c
 * 
 * Copyright (C) 2015 Alexander Krizhanovsky (a...@natsys-lab.com).
 * 
 * Modified in 2017 by Phil Bouchard (philipp...@gmail.com)
 *
 * This file is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation; either version 3, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 * See http://www.gnu.org/licenses/lgpl.html .
 * 
 * Compiled with: g++-4.9 pool-benchmark.cpp -o pool-benchmark -lboost_timer -lboost_system -lpthread -O3 -std=c++14 -DBOOST_DISABLE_THREADS
 */

#include <limits>
#include <iomanip>
#include <iostream>
#include <boost/timer.hpp>
#include <boost/pool/pool_alloc.hpp>
#include <boost/pool/object_pool.hpp>

using namespace std;
using namespace boost;


static const size_t PAGE_SIZE = 4096;

// sizeof(TfwStr)
struct Small {
	long l[3];

	std::string
	operator+(const char *str)
	{ return std::string(str) + " (Small)"; }
};

// size of common HTTP headers table
struct Big {
	Small s[10];

	std::string
	operator+(const char *str)
	{ return std::string(str) + " (Big)"; }
};

// very big allocation
struct Huge {
	char c[PAGE_SIZE * 2 + 113];

	std::string
	operator+(const char *str)
	{ return std::string(str) + " (Huge)"; }
};

static const size_t N = 20 * 1000 * 1000;

/*
 * ------------------------------------------------------------------------
 *	Tempesta FW pool
 *	Simplified and ported to user-space from
 *	https://github.com/natsys/tempesta/blob/master/tempesta_fw/pool.c
 * ------------------------------------------------------------------------
 */
/**
 * Emulation of Linux buddy allocator.
 */
#define GFP_ATOMIC		0
#define PAGE_MASK		(~(PAGE_SIZE - 1))
#define likely(x)		__builtin_expect(x, 1)
#define unlikely(x)		__builtin_expect(x, 0)
#define free_pages(p, o)	free((void *)(p))
#define get_order(n)		(assert((n) < PAGE_SIZE * 128),		\
				 (n) < PAGE_SIZE ? 0			\
				 : (n) < PAGE_SIZE * 2 ? 1		\
				 : (n) < PAGE_SIZE * 4 ? 2		\
				 : (n) < PAGE_SIZE * 8 ? 3		\
				 : (n) < PAGE_SIZE * 16 ? 4		\
				 : (n) < PAGE_SIZE * 32 ? 5		\
				 : (n) < PAGE_SIZE * 64 ? 6		\
				 : 7)

static inline unsigned long
__get_free_pages(int flags __attribute__((unused)), int order)
{
	void *p;

	if (posix_memalign((void **)&p, PAGE_SIZE, PAGE_SIZE << order))
		return 0;

	return (unsigned long)p;
}

/**
 * Memory pool chunk descriptor.
 *
 * @next	- pointer to next memory chunk;
 * @order	- order of number of pages in the chunk;
 * @off		- current chunk offset;
 */
struct TfwPoolChunk {
	TfwPoolChunk	*next;
	unsigned int	order;
	unsigned int	off;
};

/**
 * Memory pool descriptor.
 *
 * @curr	- current chunk to allocate memory from;
 * @order,@off	- cached members of @curr;
 */
typedef struct {
	TfwPoolChunk	*curr;
	unsigned int	order;
	unsigned int	off;
} TfwPool;

#define TFW_POOL_CHUNK_SZ(p)	(PAGE_SIZE << (p)->order)
#define TFW_POOL_CHUNK_BASE(c)	((unsigned long)(c) & PAGE_MASK)
#define TFW_POOL_CHUNK_END(p)	(TFW_POOL_CHUNK_BASE((p)->curr) + (p)->off)
#define TFW_POOL_ALIGN_SZ(n)	(((n) + 7) & ~7UL)
#define TFW_POOL_HEAD_OFF	(TFW_POOL_ALIGN_SZ(sizeof(TfwPool))	\
				 + TFW_POOL_ALIGN_SZ(sizeof(TfwPoolChunk)))
#define TFW_POOL_PGCACHE_SZ	256

static unsigned int pg_next;
static unsigned long pg_cache[TFW_POOL_PGCACHE_SZ];

static unsigned long
tfw_pool_alloc_pages(unsigned int order)
{
	if (likely(pg_next && !order))
		return pg_cache[--pg_next];
	return __get_free_pages(GFP_ATOMIC, order);
}

static void
tfw_pool_free_pages(unsigned long addr, unsigned int order)
{
	if (likely(pg_next < TFW_POOL_PGCACHE_SZ && !order)) {
		pg_cache[pg_next++] = addr;
	} else {
		free_pages(addr, order);
	}
}

TfwPool *
__tfw_pool_new(size_t n)
{
	TfwPool *p;
	TfwPoolChunk *c;
	unsigned int order;

	order = get_order(TFW_POOL_ALIGN_SZ(n) + TFW_POOL_HEAD_OFF);

	c = (TfwPoolChunk *)tfw_pool_alloc_pages(order);
	if (unlikely(!c))
		return NULL;

	p = (TfwPool *)((char *)c + TFW_POOL_ALIGN_SZ(sizeof(*c)));
	c->next = NULL;

	p->order = order;
	p->off = TFW_POOL_HEAD_OFF;
	p->curr = c;

	return p;
}

void *
tfw_pool_alloc(TfwPool *p, size_t n)
{
	void *a;

	n = TFW_POOL_ALIGN_SZ(n);

	if (unlikely(p->off + n > TFW_POOL_CHUNK_SZ(p))) {
		TfwPoolChunk *c, *curr = p->curr;
		unsigned int off = TFW_POOL_ALIGN_SZ(sizeof(TfwPoolChunk)) + n;
		unsigned int order = get_order(off);

		c = (TfwPoolChunk *)tfw_pool_alloc_pages(order);
		if (!c)
			return NULL;
		c->next = curr;

		curr->order = p->order;
		curr->off = p->off;

		p->order = order;
		p->off = off;
		p->curr = c;

		return (void *)TFW_POOL_ALIGN_SZ((unsigned long)(c + 1));
	}

	a = (char *)TFW_POOL_CHUNK_END(p);
	p->off += n;

	return a;
}

void
tfw_pool_free(TfwPool *p, void *ptr, size_t n)
{
	n = TFW_POOL_ALIGN_SZ(n);

	/* Stack-like usage is expected. */
	if (unlikely((char *)ptr + n != (char *)TFW_POOL_CHUNK_END(p)))
		return;

	p->off -= n;

	/* Free empty chunk which doesn't contain the pool header. */
	if (unlikely(p->off == TFW_POOL_ALIGN_SZ(sizeof(TfwPoolChunk)))) {
		TfwPoolChunk *next = p->curr->next;
		tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(p->curr), p->order);
		p->curr = next;
		p->order = next->order;
		p->off = next->off;
	}
}

void
tfw_pool_destroy(TfwPool *p)
{
	TfwPoolChunk *c, *next;

	for (c = p->curr; c; c = next) {
		next = c->next;
		tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(c), c->order);
	}
}

template <typename T>
    class tfw_allocator
    {
    public:
        typedef T        value_type;
        typedef T*       pointer;
        typedef const T* const_pointer;
        typedef T&       reference;
        typedef const T& const_reference;
        typedef std::size_t    size_type;
        typedef std::ptrdiff_t difference_type;

        template <class U>
            struct rebind 
            {
                typedef tfw_allocator<U> other;
            };

        pointer address (reference value) const 
        {
            return &value;
        }
        const_pointer address (const_reference value) const 
        {
            return &value;
        }

        tfw_allocator() throw() 
        {
            p = __tfw_pool_new(0);
        }
        
        ~tfw_allocator() throw() 
        {
            tfw_pool_destroy(p);
        }

        size_type max_size () const throw() 
        {
            return std::numeric_limits<std::size_t>::max() / sizeof(T);
        }

        pointer allocate (size_type num, const void* = 0) 
        {
            return (pointer) tfw_pool_alloc(p, num * sizeof(T));
        }

        void construct (pointer p, const T& value) 
        {
            new ((void*) p) T(value);
        }

        void destroy (pointer p) 
        {
            p->~T();
        }

        void deallocate (pointer p, size_type num) 
        {
            tfw_pool_free(p);
        }
        
    private:
        TfwPool * p;
    };
   

int main()
{
    double speed[7];
    long const n = 100000000;

    {
        timer t;
        
        for (int i = 0; i < n; ++ i)
            ;
        
        speed[0] = n / t.elapsed();
    }

    {
        timer t;
        
        for (int i = 0; i < n; ++ i)
            ::operator new (sizeof(int));
        
        speed[1] = n / t.elapsed();
    }

    {
        tfw_allocator<int> p;
        timer t;
        
        for (int i = 0; i < n; ++ i)
            p.allocate(1);
        
        speed[2] = n / t.elapsed();
    }

    {
        pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex> p;
        timer t;
        
        for (int i = 0; i < n; ++ i)
            p.allocate(1);
        
        speed[3] = n / t.elapsed();
    }

    {
        pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex, n> p;
        timer t;
        
        for (int i = 0; i < n; ++ i)
            p.allocate(1);
        
        speed[4] = n / t.elapsed();
    }

    {
        fast_pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex> p;
        timer t;
        
        for (int i = 0; i < n; ++ i)
            p.allocate(1);
        
        speed[5] = n / t.elapsed();
    }

    {
        fast_pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex, n> p;
        timer t;
        
        for (int i = 0; i < n; ++ i)
            p.allocate(1);
        
        speed[6] = n / t.elapsed();
    }
    
    cout << setprecision(numeric_limits<double>::digits10 + 1);
    
    for (int i = 0; i < 7; ++ i)
        cout << i << ": " << speed[i] << " allocations / second" << endl;

    cout << 2 << " / " << 1 << ": " << speed[2] / speed[1] * 100 << "% boost" << endl;

    return 0;
}

_______________________________________________
Development mailing list
Development@qt-project.org
http://lists.qt-project.org/mailman/listinfo/development

Re: [Development] [root_ptr] GC + Automatic Reference Counting

Reply via email to