/*
 * ARM memcpy asm replacement.
 *
 * Copyright (C) 2009 Bluush Dev Team.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/time.h>
#include <time.h>

#include "armasm_memcpy.h"




typedef struct
{
	void *(*func)(void*,const void*,size_t);
	char *name;	
} memcpy_info_t;




#define BANDWIDTH_TESTS_COUNT	3000
#define WBUFFER_SIZE			(800 * 480 *2)
#define RBUFFER_SIZE			(200 * 100 *2)
#define CACHE_SIZE				(32 * 1024)
#define CACHE_LINE				(32)

char __attribute__ ((aligned(32))) rbuffer[RBUFFER_SIZE + 3];
char __attribute__ ((aligned(32))) wbuffer[WBUFFER_SIZE + 3];
char __attribute__ ((aligned(32))) cache_buffer[CACHE_SIZE];





void memcpy_test(memcpy_info_t *info, int blocksize, int align)
{
	clock_t before, after;
	int i,j, count;
		
	if(blocksize > RBUFFER_SIZE)
		blocksize = RBUFFER_SIZE;
	count = RBUFFER_SIZE / blocksize;

	for(i=0 ; i<CACHE_SIZE ; i+= CACHE_LINE)
	{		
		__asm__ __volatile__ ("pld [%[cache_buffer], %[offset]]"::
								[cache_buffer] "r" (cache_buffer),
								[offset] "r" (i)
							);
	}

	fprintf(stderr, "%s() memcpy bandwidth (align=%d, size=%d):\n", 
			info->name, align, blocksize);
			
	before = clock();
	
	for(j=0 ; j<BANDWIDTH_TESTS_COUNT ; j++)
	{
		for(i=0 ; i<count ; ++i)
			info->func(wbuffer + i*blocksize + align, rbuffer + i * blocksize, blocksize);
	}
	
	after = clock();

	fprintf(stderr, "\x1b[1A\x1b[65C%.2fMB/s\n",
			(((float)count * BANDWIDTH_TESTS_COUNT / 1000.0) *
			 ((float)blocksize / 1000.0)) / 
			((float)(after-before)/(float)CLOCKS_PER_SEC));
}



static memcpy_info_t info[] =
{
	{ memcpy,   "libc" },	
	{ direct_armasm_memcpy,  "armasm" },
	{ direct_armasm_memcpy2, "armasm2" },
};



#define INFO_SIZE(i)	(sizeof((i)) / sizeof(memcpy_info_t))



int main(int argc, char *argv[])
{
	int i, align, blocksize = 100;

	if(argc > 1)
		blocksize = strtoul(argv[1], NULL, 0);
	
	for(i=0 ; i<RBUFFER_SIZE ; i++)
		rbuffer[i] = rand();

	for(align = 0 ; align < 4 ; align++)
	{
		for(i=0 ; i<INFO_SIZE(info) ; i++)
			memcpy_test(&info[i], blocksize,align);
		fprintf(stderr, "\n");
	}
	return 0;
}

