Geoff Hutchison writes:
>
> If you can get me the source, I'll contact Sleepycat and ask about the
> results. From previous experience, they're willing to answer questions
> like this.
Here are the sources. Forget the previous sources I sent:
Thanks for your help on this :-)
-- benchit.cc
#ifdef HAVE_CONFIG_H
#include <htconfig.h>
#endif /* HAVE_CONFIG_H */
#include <fstream.h>
// If we have this, we probably want it.
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
#include <malloc.h>
#include <stdlib.h>
#include <htString.h>
#include <Database.h>
typedef struct {
char* wordsfile;
char* data;
int nwords;
int add;
int loop;
int ndoc;
int prefix;
int disable_prefix;
int suffix;
} params_t;
static void keys();
static void dobench(int mode, params_t* params);
static int verbose;
void usage();
//*****************************************************************************
// int main(int ac, char **av)
//
int main(int ac, char **av)
{
int c;
extern char *optarg;
params_t params;
int mode = 1;
params.wordsfile = strdup("words.all");
params.nwords = -1;
params.add = 10000;
params.loop = 1;
params.ndoc = 1;
params.data = strdup("a");
params.suffix = params.prefix = 0;
params.disable_prefix = 0;
while ((c = getopt(ac, av, "v01234w:n:a:l:d:D:kpsP")) != -1)
{
switch (c)
{
case '0':
case '1':
case '2':
case '3':
case '4':
mode = c - '0';
break;
case 'k':
keys();
exit(0);
break;
case 'w':
free(params.wordsfile);
params.wordsfile = strdup(optarg);
break;
case 'D':
free(params.data);
params.data = strdup(optarg);
break;
case 'n':
params.nwords = atoi(optarg);
break;
case 'a':
params.add = atoi(optarg);
break;
case 'l':
params.loop = atoi(optarg);
break;
case 'd':
params.ndoc = atoi(optarg);
break;
case 'v':
verbose++;
break;
case 'p':
params.prefix = 1;
break;
case 's':
params.suffix = 1;
break;
case 'P':
params.disable_prefix = 1;
break;
case '?':
usage();
break;
}
}
dobench(mode, ¶ms);
free(params.wordsfile);
free(params.data);
return 0;
}
//*****************************************************************************
// void usage()
// Display program usage information
//
void usage()
{
cout << "usage: benchit [-v][-1]\n";
cout << "Options:\n";
cout << "\t-v\tVerbose mode. This increases the verbosity of the\n";
cout << "\t-0\tOnly fill with word/documentid.\n\n";
cout << "\t-1\tNo dups, get, grow, put.\n\n";
cout << "\t-2\tDups, get, delete, grow, put.\n\n";
cout << "\t-3\tDups, put.\n\n";
cout << "\t-w file\tRead words list from file (default words.all).\n\n";
cout << "\t-n limit\tRead at most <limit> words (default no limit).\n\n";
cout << "\t-a loop\tLoop <loop> times when updating a word (default 10 000).\n\n";
cout << "\t-l loop\tDo the whole test <loop> times (default 1).\n\n";
cout << "\t-d n\tDo as if each word appear in <n> documents (default 1).\n\n";
cout << "\t-D data\tdata appended to record is <data> (default a).\n\n";
cout << "\t-p\tadd prefix 0000000000 to key (default no prefix).\n\n";
cout << "\t-P\tdisable prefix compression (default enabled).\n\n";
cout << "\t-s\tadd suffix 0000000000 to key (default no suffix).\n\n";
exit(0);
}
static void fill(Database* db, params_t* params)
{
ifstream in(params->wordsfile);
char buffer[50000];
char tmp[1024];
int words_count = 0;
cout << "Reading from " << params->wordsfile << " ... ";
while (!in.bad()) {
in.getline(buffer, sizeof(buffer));
if (in.eof())
break;
String line;
line << buffer;
line.chop("\r\n");
char *current = line.get();
char* pad = "0000000000";
for(int j = 0; j < params->ndoc; j++) {
sprintf(tmp, "%s%s\001%d%s", (params->prefix ? pad : ""), current, j,
(params->suffix ? pad : ""));
db->Put(tmp, line);
}
words_count++;
if(params->nwords > 0 && params->nwords <= words_count) break;
}
cout << "pushed " << words_count << " words (" << params->ndoc << " documents) \n";
}
static void try1(Database* db, params_t* params)
{
char* keys[] = {
"billions",
"millions",
0
};
for(char** key = keys; *key; key++) {
db->Start_Seq(*key);
*key = strdup(db->Get_Next_Seq());
cout << "key: " << *key << "\n";
}
for(int i = 0; i < params->add; i++) {
for(char** key = keys; *key; key++) {
String data;
if(db->Get(*key, data) != OK)
abort();
data += "a";
if(db->Put(*key, data) != OK)
abort();
if(i >= params->add - 1)
cout << "try1: " << *key << " is " << data.length() << " bytes long\n";
}
}
for(char** key = keys; *key; key++) {
if(db->Put(*key, *key) != OK)
abort();
}
}
static void try2(Database* db, params_t* params)
{
char* keys[] = {
"billions",
"millions",
0
};
for(char** key = keys; *key; key++) {
db->Start_Seq(*key);
*key = strdup(db->Get_Next_Seq());
cout << "key: " << *key << "\n";
}
for(int i = 0; i < params->add; i++) {
for(char** key = keys; *key; key++) {
String data;
if(db->Get(*key, data) != OK)
abort();
if(db->Delete(*key) != OK)
abort();
data += params->data;
data += String(i);
if(db->Put(*key, data) != OK)
abort();
if(i >= params->add - 1)
cout << "try2: " << *key << " is " << data.length() << " bytes long\n";
}
}
for(char** key = keys; *key; key++) {
if(db->Put(*key, *key) != OK)
abort();
}
}
static void try3(Database* db, params_t* params)
{
char* keys[] = {
"billions",
"millions",
0
};
for(int i = 0; i < params->add; i++) {
for(char** key = keys; *key; key++) {
String data(params->data);
data += String(i);
if(db->Put(*key, data) != OK)
abort();
if(i >= params->add - 1)
cout << "try2: " << *key << " is " << data.length() << " bytes long\n";
}
}
}
static void dobench(int mode, params_t* params)
{
Database* db = Database::getDatabaseInstance(DB_BTREE);
int flags = 0644;
if(mode == 1)
flags |= 0x400000;
if(params->disable_prefix)
flags |= 0x800000;
if(db->OpenReadWrite("./test", flags) != OK)
abort();
fill(db, params);
for(int i = 0; i < params->loop; i++) {
switch(mode) {
case 1:
try1(db, params);
break;
case 2:
try2(db, params);
break;
case 3:
try3(db, params);
break;
}
}
if(db->Close() != OK)
abort();
delete db;
}
static void keys()
{
Database* db = Database::getDatabaseInstance(DB_BTREE);
if(db->OpenRead("/opt/www/var/htdig/db.words.db") != OK)
abort();
db->Start_Get();
char* key;
while((key = db->Get_Next())) {
cout << key << "\n";
}
if(db->Close() != OK)
abort();
delete db;
}
-- Makefile.am
include $(top_srcdir)/Makefile.config
LOCAL_DEFINES = -p
noinst_PROGRAMS = benchit
benchit_SOURCES = benchit.cc
benchit_DEPENDENCIES = $(HTLIBS)
benchit_LDFLAGS = -p
benchit_LDADD = $(HTLIBS)
#
# Run bench1 once to make sure everything that
# will be cached is cached.
#
bench: benchit
./benchit -1 > /dev/null 2>&1
for i in 1 2 3 ; do \
$(MAKE) BENCH=$$i dobench ; \
done
BENCH=1
LOOP=20
dobench: benchit
( rm -f test ; \
time -v ./benchit -$(BENCH) -l $(LOOP) ; \
gprof .libs/benchit ; \
db_dump test > test.dump ; \
db_load test < test.dump ; \
db_stat -d test ; \
ls -l test ) > bench$(BENCH).out 2>&1
NDOC = 10
doprefix:
#
# Without prefix function $(NDOC) documents
#
$(MAKE) DISABLE_PREFIX=-P prefix
#
# With prefix function $(NDOC) documents
#
$(MAKE) prefix
prefix: benchit
@rm -f test ; \
nwords=`wc -l words.all | perl -n -e 'print if(s/\s*(\d+).*/\1/)'` ; \
time ./benchit -0 -d $(NDOC) $(DISABLE_PREFIX) ; \
s1=`stat test | perl -n -e 'print if(s/.*size:\s+(\d+).*/\1/i)'` ; \
bw=`expr $$s1 / \( $$nwords \* $(NDOC) \)` ; \
echo "no prefix : $$bw bytes per word" ; \
rm -f test ; \
time ./benchit -0 -d $(NDOC) -p $(DISABLE_PREFIX) ; \
s1=`stat test | perl -n -e 'print if(s/.*size:\s+(\d+).*/\1/i)'` ; \
bw=`expr $$s1 / \( $$nwords \* $(NDOC) \)` ; \
echo "prefix : $$bw bytes per word" ; \
rm -f test ; \
time ./benchit -0 -d $(NDOC) -s $(DISABLE_PREFIX) ; \
s1=`stat test | perl -n -e 'print if(s/.*size:\s+(\d+).*/\1/i)'` ; \
bw=`expr $$s1 / \( $$nwords \* $(NDOC) \)` ; \
echo "suffix : $$bw bytes per word"
words:
find /usr/info -name '*.gz' -print | xargs zcat | perl -n -e 'print join("\n",
map { lc } grep(length() > 2 && length() < 32, m/[a-z]+/ig)) . "\n"' | grep -v '^$$' |
sort -u > words
PHONY: words
-- And the small hack to DB2_db.cc that makes it work
/*
* __bam_defpfx --
* Default prefix routine.
*
* PUBLIC: size_t __bam_defpfx __P((const DBT *, const DBT *));
*/
static size_t
bam_defpfx(const DBT *a, const DBT *b)
{
size_t cnt, len;
u_int8_t *p1, *p2;
cnt = 1;
len = a->size > b->size ? b->size : a->size;
for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt)
if (*p1 != *p2)
return (cnt);
/*
* We know that a->size must be <= b->size, or they wouldn't be
* in this order.
*/
return (a->size < b->size ? a->size + 1 : a->size);
}
/*
* __bam_defcmp --
* Default comparison routine.
*
* PUBLIC: int __bam_defcmp __P((const DBT *, const DBT *));
*/
static int
bam_defcmp(const DBT *a, const DBT *b)
{
size_t len;
u_int8_t *p1, *p2;
/*
* Returns:
* < 0 if a is < b
* = 0 if a is = b
* > 0 if a is > b
*
* XXX
* If a size_t doesn't fit into a long, or if the difference between
* any two characters doesn't fit into an int, this routine can lose.
* What we need is a signed integral type that's guaranteed to be at
* least as large as a size_t, and there is no such thing.
*/
len = a->size > b->size ? b->size : a->size;
for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2)
if (*p1 != *p2)
return ((long)*p1 - (long)*p2);
return ((long)a->size - (long)b->size);
}
int
DB2_db::OpenReadWrite(char *filename, int mode)
{
//
// Initialize the database environment.
//
dbenv = db_init((char *)NULL);
memset(&dbinfo, 0, sizeof(dbinfo));
// dbinfo.db_cachesize = CACHE_SIZE_IN_KB * 1024; // Cachesize: 64K.
// dbinfo.db_pagesize = 1024; // Page size: 1K.
// Hack for benchmarking. Turn off duplicates if bit 0x400000 is on. */
if(!(mode & 0x800000)) dbinfo.bt_prefix = bam_defpfx;
dbinfo.bt_compare = bam_defcmp;
if(!(mode & 0x400000)) dbinfo.flags = DB_DUP;
..
------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
[EMAIL PROTECTED] containing the single word "unsubscribe" in
the SUBJECT of the message.