DB=$1
ROWS=$2

# only for the data loading part, not queries
WORK_MEM='1GB'

function log {
	echo `date +%s` [`date +'%Y-%m-%d %H:%M:%S'`] $1
}

function create_tables {

	psql $DB > /dev/null  <<EOF
-- tables with master data (running generate_series only once)
CREATE TABLE data_int (a INT);
INSERT INTO  data_int SELECT i FROM generate_series(1, $ROWS) s(i);

CREATE TABLE data_float (a INT);
INSERT INTO  data_float SELECT 100000 * random() FROM generate_series(1, $ROWS) s(i);

-- tables used for the actual testing
CREATE TABLE int_test         (a INT);
CREATE TABLE int_test_padding (a INT, b TEXT);

CREATE TABLE text_test         (a TEXT);
CREATE TABLE text_test_padding (a TEXT, b TEXT);

CREATE TABLE numeric_test         (a NUMERIC);
CREATE TABLE numeric_test_padding (a NUMERIC, b TEXT);
EOF

}

function truncate_tables {

	log "truncating tables"

	psql $DB > /dev/null  <<EOF
TRUNCATE TABLE int_test;
TRUNCATE TABLE int_test_padding;

TRUNCATE TABLE text_test;
TRUNCATE TABLE text_test_padding;

TRUNCATE TABLE numeric_test;
TRUNCATE TABLE numeric_test_padding;
EOF

}

function vacuum_analyze {

	log "analyzing"

	psql $DB > /dev/null  <<EOF
VACUUM ANALYZE;
CHECKPOINT;
EOF

}

# unique data

function load_unique_sorted {

	truncate_tables

	log "loading unique tables / sorted"

	psql $DB > /dev/null  <<EOF
SET work_mem = '$WORK_MEM';

INSERT INTO int_test SELECT a FROM data_int;
INSERT INTO int_test_padding SELECT a, repeat(md5(a::text),10) FROM data_int;

INSERT INTO text_test SELECT md5(a::text) FROM data_int ORDER BY md5(a::text);
INSERT INTO text_test_padding SELECT md5(a::text), repeat(md5((a+1)::text),10) FROM data_int ORDER BY md5(a::text);

INSERT INTO numeric_test SELECT a FROM data_float ORDER BY a;
INSERT INTO numeric_test_padding SELECT a, repeat(md5(a::text),10) FROM data_float ORDER BY a;
EOF

	vacuum_analyze

}

function load_unique_random {

	truncate_tables

	log "loading unique tables / random"

	psql $DB > /dev/null  <<EOF
SET work_mem = '$WORK_MEM';

-- this needs randomization
INSERT INTO int_test SELECT a FROM data_int ORDER BY random();
INSERT INTO int_test_padding SELECT a, repeat(md5(a::text),10) FROM data_int ORDER BY random();

-- these already are random
INSERT INTO text_test SELECT md5(a::text) FROM data_int;
INSERT INTO text_test_padding SELECT md5(a::text), repeat(md5((a+1)::text),10) FROM data_int;

INSERT INTO numeric_test SELECT a FROM data_float;
INSERT INTO numeric_test_padding SELECT a, repeat(md5(a::text),10) FROM data_float;
EOF

	vacuum_analyze

}

function load_unique_almost_asc {

	truncate_tables

	log "loading unique tables / almost sorted"

	psql $DB > /dev/null  <<EOF
SET work_mem = '$WORK_MEM';

INSERT INTO int_test SELECT a FROM (
    SELECT a, rank() OVER (ORDER BY a) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO int_test_padding SELECT a, repeat(b,10) FROM (
    SELECT a, md5(a::text) AS b, rank() OVER (ORDER BY a) AS r FROM data_int
) foo ORDER BY r + (100 * random());
 
INSERT INTO text_test SELECT a FROM (
    SELECT md5(a::text) AS a, rank() OVER (ORDER BY md5(a::text)) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO text_test_padding SELECT a, repeat(b,10) FROM (
    SELECT md5(a::text) AS a, md5((a+1)::text) AS b, rank() OVER (ORDER BY md5(a::text)) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO numeric_test SELECT a FROM (
    SELECT a, rank() OVER (ORDER BY a) AS r FROM data_float
) foo ORDER BY r + (100 * random());

INSERT INTO numeric_test_padding SELECT a, repeat(b,10) FROM (
    SELECT a, md5(a::text) AS b, rank() OVER (ORDER BY a) AS r FROM data_float
) foo ORDER BY r + (100 * random());
EOF

	vacuum_analyze

}

# high cardinality (10% of table size)

function load_high_cardinality_sorted {

	truncate_tables

	log "loading high cardinality tables / sorted"

	psql $DB > /dev/null  <<EOF
SET work_mem = '$WORK_MEM';

INSERT INTO int_test SELECT a/10 FROM data_int;
INSERT INTO int_test_padding SELECT a/10, repeat(md5(a::text),10) FROM data_int;

INSERT INTO text_test SELECT md5((a/10)::text) FROM data_int ORDER BY md5((a/10)::text);
INSERT INTO text_test_padding SELECT md5((a/10)::text), repeat(md5((a+1)::text),10) FROM data_int ORDER BY md5(a::text);

INSERT INTO numeric_test SELECT hashint4(a/10)::numeric/100 FROM data_int ORDER BY a;
INSERT INTO numeric_test_padding SELECT hashint4(a/10)::numeric/100, repeat(md5(a::text),10) FROM data_int ORDER BY a;
EOF

	vacuum_analyze

}

function load_high_cardinality_random {

	truncate_tables

	log "loading high cardinality tables / random"

	psql $DB > /dev/null  <<EOF

-- this needs randomization
INSERT INTO int_test SELECT a/10 FROM data_int ORDER BY random();
INSERT INTO int_test_padding SELECT a/10, md5(a::text) FROM data_int ORDER BY random();

-- these already are random
INSERT INTO text_test SELECT md5((a/10)::text) FROM data_int;
INSERT INTO text_test_padding SELECT md5((a/10)::text), md5((a+1)::text) FROM data_int;

INSERT INTO numeric_test SELECT hashint4(a/10)::numeric/100 FROM data_int;
INSERT INTO numeric_test_padding SELECT hashint4(a/10)::numeric/100, md5(a::text) FROM data_int;
EOF

	vacuum_analyze

}

function load_high_cardinality_almost_asc {

	truncate_tables

	log "loading high cardinality tables / almost sorted"

	psql $DB > /dev/null  <<EOF
SET work_mem = '$WORK_MEM';

INSERT INTO int_test SELECT a FROM (
    SELECT (a/10) AS a, rank() OVER (ORDER BY (a/10)) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO int_test_padding SELECT a, repeat(b,10) FROM (
    SELECT (a/10) AS a, md5(a::text) AS b, rank() OVER (ORDER BY (a/10)) AS r FROM data_int
) foo ORDER BY r + (100 * random());
 
INSERT INTO text_test SELECT a FROM (
    SELECT md5((a/10)::text) AS a, rank() OVER (ORDER BY md5((a/10)::text)) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO text_test_padding SELECT a, repeat(b,10) FROM (
    SELECT md5((a/10)::text) AS a, md5((a/10+1)::text) AS b, rank() OVER (ORDER BY md5((a/10)::text)) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO numeric_test SELECT a FROM (
    SELECT ((a/10)::numeric/1000) AS a, rank() OVER (ORDER BY (a/10)::numeric) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO numeric_test_padding SELECT a, repeat(b,10) FROM (
    SELECT ((a/10)::numeric/1000) AS a, md5((a/10)::text) AS b, rank() OVER (ORDER BY (a/10)) AS r FROM data_int
) foo ORDER BY r + (100 * random());
EOF

	vacuum_analyze

}

# low cardinality (1% of table size)

function load_low_cardinality_sorted {

	truncate_tables

	log "loading low cardinality tables / sorted"

	psql $DB > /dev/null  <<EOF
SET work_mem = '$WORK_MEM';

INSERT INTO int_test SELECT a/100 FROM data_int;
INSERT INTO int_test_padding SELECT a/100, repeat(md5(a::text),10) FROM data_int;

INSERT INTO text_test SELECT md5((a/100)::text) FROM data_int ORDER BY md5((a/100)::text);
INSERT INTO text_test_padding SELECT md5((a/100)::text), repeat(md5((a+1)::text),10) FROM data_int ORDER BY md5(a::text);

INSERT INTO numeric_test SELECT hashint4(a/100)::numeric/1000 FROM data_int ORDER BY a;
INSERT INTO numeric_test_padding SELECT hashint4(a/100)::numeric/1000, repeat(md5(a::text),10) FROM data_int ORDER BY a;
EOF

	vacuum_analyze

}

function load_low_cardinality_random {

	truncate_tables

	log "loading low cardinality tables / random"

	psql $DB > /dev/null  <<EOF
SET work_mem = '$WORK_MEM';

-- this needs randomization
INSERT INTO int_test SELECT a/100 FROM data_int ORDER BY random();
INSERT INTO int_test_padding SELECT a/100, md5(a::text) FROM data_int ORDER BY random();

-- these already are random
INSERT INTO text_test SELECT md5((a/100)::text) FROM data_int;
INSERT INTO text_test_padding SELECT md5((a/100)::text), md5((a+1)::text) FROM data_int;

INSERT INTO numeric_test SELECT hashint4(a/100)::numeric/1000 FROM data_int;
INSERT INTO numeric_test_padding SELECT hashint4(a/100)::numeric/1000, md5(a::text) FROM data_int;
EOF

	vacuum_analyze

}

function load_low_cardinality_almost_asc {

	truncate_tables

	log "loading low cardinality tables / almost sorted"

	psql $DB > /dev/null  <<EOF
SET work_mem = '$WORK_MEM';

INSERT INTO int_test SELECT a FROM (
    SELECT (a/100) AS a, rank() OVER (ORDER BY (a/100)) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO int_test_padding SELECT a, repeat(b,10) FROM (
    SELECT (a/100) AS a, md5(a::text) AS b, rank() OVER (ORDER BY (a/100)) AS r FROM data_int
) foo ORDER BY r + (100 * random());
 
INSERT INTO text_test SELECT a FROM (
    SELECT md5((a/100)::text) AS a, rank() OVER (ORDER BY md5((a/100)::text)) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO text_test_padding SELECT a, repeat(b,10) FROM (
    SELECT md5((a/100)::text) AS a, md5((a/100+1)::text) AS b, rank() OVER (ORDER BY md5((a/100)::text)) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO numeric_test SELECT a FROM (
    SELECT ((a/100)::numeric/10000) AS a, rank() OVER (ORDER BY (a/100)::numeric) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO numeric_test_padding SELECT a, repeat(b,10) FROM (
    SELECT ((a/100)::numeric/10000) AS a, md5((a/100)::text) AS b, rank() OVER (ORDER BY (a/100)) AS r FROM data_int
) foo ORDER BY r + (100 * random());
EOF

	vacuum_analyze

}

function run_query {

	times=""
	group=$1
	wmem=$2
	workers=$3
	query=$4

	echo "--" `date` [`date +%s`] >> explains.log
	echo "-- $group rows=$ROWS work_mem=$wmem workers=$workers" >> explains.log

	psql $DB >> explains.log 2>&1 <<EOF
SET trace_sort=on;
SET work_mem='$wmem';
SET max_parallel_workers_per_gather=$workers;
explain $query
EOF

	s=`date +%s`

	for i in `seq 1 5`; do

		/usr/bin/time -f '%e' -o 'query.time' psql $DB > /dev/null <<EOF
\pset pager off
\o /dev/null
SET trace_sort=on;
SET work_mem='$wmem';
SET max_parallel_workers_per_gather=$workers;
COPY ($query) TO '/dev/null'
EOF

		x=`cat query.time`
		times="$times $x"

	done

	e=`date +%s`

	echo $group $ROWS $wmem $workers $s $e "'$query'" $times >> results.csv
}

function run_index {

        times=""
        group=$1
        wmem=$2
        workers=$3
        query=$4

        times=""

	s=`date +%s`

        for i in `seq 1 5`; do

                /usr/bin/time -f '%e' -o 'query.time' psql $DB > /dev/null <<EOF
\pset pager off
\o /dev/null
SET maintenance_work_mem='$wmem';
$query
EOF

                x=`cat query.time`
                times="$times $x"

        done

	e=`date +%s`

        echo $group $ROWS $wmem $workers $s $e "'$query'" $times >> results.csv
}

function run_queries {

	for wm in '1MB' '8MB' '32MB' '128MB' '512MB' '1GB'; do

		log "running queries work_mem=$wm noparallel"

		run_query $1 $wm 0 'SELECT * FROM int_test ORDER BY a'
		run_query $1 $wm 0 'SELECT * FROM int_test_padding ORDER BY a'

		run_query $1 $wm 0 'SELECT * FROM int_test ORDER BY a DESC'
		run_query $1 $wm 0 'SELECT * FROM int_test_padding ORDER BY a DESC'

		run_query $1 $wm 0 'SELECT DISTINCT a FROM int_test ORDER BY a'
		run_query $1 $wm 0 'SELECT DISTINCT a FROM int_test_padding ORDER BY a'

		run_query $1 $wm 0 'SELECT a FROM int_test UNION SELECT a FROM int_test_padding'

		run_index $1 $wm 0 'CREATE INDEX x ON int_test (a); DROP INDEX x'
		run_index $1 $wm 0 'CREATE INDEX x ON int_test_padding (a); DROP INDEX x'

		run_query $1 $wm 0 'SELECT * FROM text_test ORDER BY a'
		run_query $1 $wm 0 'SELECT * FROM text_test_padding ORDER BY a'

		run_query $1 $wm 0 'SELECT * FROM text_test ORDER BY a DESC'
		run_query $1 $wm 0 'SELECT * FROM text_test_padding ORDER BY a DESC'

		run_query $1 $wm 0 'SELECT DISTINCT a FROM text_test ORDER BY a'
		run_query $1 $wm 0 'SELECT DISTINCT a FROM text_test_padding ORDER BY a'

		run_query $1 $wm 0 'SELECT a FROM text_test UNION SELECT a FROM text_test_padding'

		run_index $1 $wm 0 'CREATE INDEX x ON text_test (a); DROP INDEX x'
		run_index $1 $wm 0 'CREATE INDEX x ON text_test_padding (a); DROP INDEX x'

		run_query $1 $wm 0 'SELECT * FROM numeric_test ORDER BY a'
		run_query $1 $wm 0 'SELECT * FROM numeric_test_padding ORDER BY a'

		run_query $1 $wm 0 'SELECT * FROM numeric_test ORDER BY a DESC'
		run_query $1 $wm 0 'SELECT * FROM numeric_test_padding ORDER BY a DESC'

		run_query $1 $wm 0 'SELECT DISTINCT a FROM numeric_test ORDER BY a'
		run_query $1 $wm 0 'SELECT DISTINCT a FROM numeric_test_padding ORDER BY a'

		run_query $1 $wm 0 'SELECT a FROM numeric_test UNION SELECT a FROM numeric_test_padding'

		run_index $1 $wm 0 'CREATE INDEX x ON numeric_test (a); DROP INDEX x'
		run_index $1 $wm 0 'CREATE INDEX x ON numeric_test_padding (a); DROP INDEX x'

		log "running queries work_mem=$wm parallel"

                run_query $1 $wm 4 'SELECT * FROM int_test ORDER BY a'
                run_query $1 $wm 4 'SELECT * FROM int_test_padding ORDER BY a'

                run_query $1 $wm 4 'SELECT * FROM int_test ORDER BY a DESC'
                run_query $1 $wm 4 'SELECT * FROM int_test_padding ORDER BY a DESC'

                run_query $1 $wm 4 'SELECT DISTINCT a FROM int_test ORDER BY a'
                run_query $1 $wm 4 'SELECT DISTINCT a FROM int_test_padding ORDER BY a'

                run_query $1 $wm 4 'SELECT a FROM int_test UNION SELECT a FROM int_test_padding'

                run_index $1 $wm 4 'CREATE INDEX x ON int_test (a); DROP INDEX x'
                run_index $1 $wm 4 'CREATE INDEX x ON int_test_padding (a); DROP INDEX x'

                run_query $1 $wm 4 'SELECT * FROM text_test ORDER BY a'
                run_query $1 $wm 4 'SELECT * FROM text_test_padding ORDER BY a'

                run_query $1 $wm 4 'SELECT * FROM text_test ORDER BY a DESC'
                run_query $1 $wm 4 'SELECT * FROM text_test_padding ORDER BY a DESC'

                run_query $1 $wm 4 'SELECT DISTINCT a FROM text_test ORDER BY a'
                run_query $1 $wm 4 'SELECT DISTINCT a FROM text_test_padding ORDER BY a'

                run_query $1 $wm 4 'SELECT a FROM text_test UNION SELECT a FROM text_test_padding'

                run_index $1 $wm 4 'CREATE INDEX x ON text_test (a); DROP INDEX x'
                run_index $1 $wm 4 'CREATE INDEX x ON text_test_padding (a); DROP INDEX x'

                run_query $1 $wm 4 'SELECT * FROM numeric_test ORDER BY a'
                run_query $1 $wm 4 'SELECT * FROM numeric_test_padding ORDER BY a'

                run_query $1 $wm 4 'SELECT * FROM numeric_test ORDER BY a DESC'
                run_query $1 $wm 4 'SELECT * FROM numeric_test_padding ORDER BY a DESC'

                run_query $1 $wm 4 'SELECT DISTINCT a FROM numeric_test ORDER BY a'
                run_query $1 $wm 4 'SELECT DISTINCT a FROM numeric_test_padding ORDER BY a'

                run_query $1 $wm 4 'SELECT a FROM numeric_test UNION SELECT a FROM numeric_test_padding'

                run_index $1 $wm 4 'CREATE INDEX x ON numeric_test (a); DROP INDEX x'
                run_index $1 $wm 4 'CREATE INDEX x ON numeric_test_padding (a); DROP INDEX x'

	done

}

dropdb $DB
createdb $DB

create_tables

log "sort benchmark $ROWS"

load_unique_sorted

run_queries "unique_sorted"

load_unique_random

run_queries "unique_random"

load_unique_almost_asc

run_queries "unique_almost_asc"

load_low_cardinality_sorted

run_queries "low_cardinality_sorted"

load_low_cardinality_random

run_queries "low_cardinality_random"

load_low_cardinality_almost_asc

run_queries "low_cardinality_almost_asc"

load_high_cardinality_sorted

run_queries "high_cardinality_sorted"

load_high_cardinality_random

run_queries "high_cardinality_random"

load_high_cardinality_almost_asc

run_queries "high_cardinality_almost_asc"
