#!/bin/bash

set -e

dropdb --if-exists test
createdb test

psql test -c 'create extension pageinspect'


function check_brin_ranges {

	opclass=$1
	pages_per_range=$2

	# first check the index has the right number of brin ranges, for the table pages
	table_pages=$(psql test -t -A -c "select pg_relation_size('brin_test') / 8192")

	index_pages=$(psql test -t -A -c "select pg_relation_size('brin_test_a_idx') / 8192")

	rm -f ranges.log blknums.log

	for p in `seq 0 $((index_pages - 1))`; do
		psql test -t -A -c "SELECT * FROM brin_page_items(get_raw_page('brin_test_a_idx', $p), 'brin_test_a_idx')" >> ranges.log 2> /dev/null || true
		psql test -t -A -c "SELECT blknum FROM brin_page_items(get_raw_page('brin_test_a_idx', $p), 'brin_test_a_idx')" >> blknums.log 2> /dev/null || true
	done

	distinct_ranges=$(grep -v '^$' blknums.log | wc -l)

	index_ranges=$(grep -v '^$' ranges.log | wc -l)

	expected_ranges=$(((table_pages + pages_per_range - 1) / pages_per_range))

	echo "table pages $table_pages range $pages_per_range"
	echo "ranges expected $expected_ranges index $index_ranges distinct $distinct_ranges"

	if [[ ! $index_ranges -eq $expected_ranges ]]; then
		echo "ERROR: number of expected and built ranges do not match"
		exit 1
	fi

	if [[ ! $index_ranges -eq $distinct_ranges ]]; then
		echo "ERROR: duplicate index ranges ($index_ranges != $distinct_ranges)"
		exit 1
	fi

	# now check that the ranges are valid with respect to data
	min=$(psql test -t -A -c "SELECT MIN(a) FROM brin_test")
	max=$(psql test -t -A -c "SELECT MAX(a) FROM brin_test")

	echo "min $min max $max"

	while IFS= read -r line
	do
		IFS='|' read -r -a array <<< "$line"
		itemoffset=${array[0]}
		blknum=${array[1]}
		attnum=${array[2]}
		allnulls=${array[3]}
		hasnulls=${array[4]}
		placeholder=${array[5]}
		empty=${array[6]}
		value=${array[7]}

		if [[ "$empty" == 't' && "$allnulls" != 't' ]]; then
			echo "ERROR: invalid empty/allnulls combination ($line)" 
			exit 1
		fi

		# if not empty, parse the value and check min/max, but only for minmax
		if [ "$empty" != 't' ] && [ "$opclass" == "minmax" ]; then

			str=$(echo $value | sed 's/[{}]//g')

			minval=$(echo $str | awk '{print $1}')
			maxval=$(echo $str | awk '{print $3}')

			if [[ $minval -lt $min ]]; then
				echo "ERROR: invalid min value ($minval < $min) ($line)" 
				exit 1
			fi

			if [[ $maxval -gt $max ]]; then
				echo "ERROR: invalid max value ($maxval > $max) ($line)" 
				exit 1
			fi

		fi

	done < ranges.log
}


function check_brin_values {

	where=$1

	# check just 1% of values
	sample=0.005

	psql -t -A test > values.log <<EOF
SET enable_seqscan = on;
SET enable_bitmapscan = off;
SELECT * FROM (SELECT a, COUNT(*) FROM brin_test $where GROUP BY a ORDER BY a) foo WHERE random() < $sample;
EOF

	tail -n +3 values.log > values2.log

	while IFS= read -r line; do

		IFS='|' read -r -a array <<< "$line"

		value="${array[0]}"
		seqcount="${array[1]}"

		psql -t -A test >> explains.log 2>&1 <<EOF
SET enable_seqscan = off;
SET enable_bitmapscan = on;
EXPLAIN SELECT COUNT(*) FROM brin_test WHERE a = $value;
EOF

		psql -t -A test > tmp.log 2>&1 <<EOF
SET enable_seqscan = off;
SET enable_bitmapscan = on;
SELECT COUNT(*) FROM brin_test WHERE a = $value;
EOF

		bitmapcount=`cat tmp.log | tail -n +3`

		echo $value $seqcount $bitmapcount

		if [ "$seqcount" != "$bitmapcount" ]; then
			exit 1
		fi

	done < values2.log

}

while /bin/true; do

	# 1-10M rows
	nrows=$((1000000 * (1 + RANDOM % 10)))

	# fillfactor 10-100
	fillfactor=$((10 + (RANDOM % 90)))

	# page range 1-128
	pagerange=$((1 + (RANDOM % 128)))

	# max allowed parallel workers 0-8
	parallel_workers=$((RANDOM % 9))

	# max allowed maintenance workers 0-8
	maintenance_workers=$((RANDOM % 9))

	# distinct values 1000 - nrows/10
	ndistinct=$((1000 + ((nrows / 10 - 1000) * RANDOM / 32767)))

	# variability of values 0%-10% (overlaps)
	variability=$((RANDOM % 11))

	# delete 0-5%
	delete=$((RANDOM % 5))

	# filter to 0-5% by index predicate
	filter=$((RANDOM % 5))

	# index opclass
	opclass="minmax"
	r=$((RANDOM % 2))

	# but 50% of the time use bloom
	if [ "$r" == "0" ]; then
		opclass="bloom"
	fi

	echo rows $nrows fillfactor $fillfactor pagerange $pagerange opclass $opclass parallel_workers $parallel_workers maintenance_workers $maintenance_workers ndistinct $ndistinct variability $variability delete $delete filter $filter

	psql test -c "drop table if exists brin_test"
	psql test -c "create table brin_test (a bigint) with (fillfactor=$fillfactor)"
	psql test -c "insert into brin_test select (($ndistinct * (i::float / $nrows) + random() * ($ndistinct * $variability / 100.0))::bigint) from generate_series(1,$nrows) s(i)"

	echo "insert into brin_test select (($ndistinct * (i::float / $nrows) + random() * ($ndistinct * $variability / 100.0))::bigint) from generate_series(1,$nrows) s(i)"

	# optionally delete a chunk of data
	if [ "$delete" != "0" ]; then
		x=$((RANDOM % ndistinct))
		y=$((x + delete * ndistinct / 100))
		psql test -c "delete from brin_test where a between $x and $y"
	fi

	where=""
	if [ "$filter" != "0" ]; then
		x=$((RANDOM % ndistinct))
		y=$((x + filter * ndistinct / 100))
		where="WHERE (a BETWEEN $x AND $y)"
	fi

	psql test -c "vacuum analyze brin_test"

	if [ "$opclass" == "minmax" ]; then
		ddl="CREATE INDEX ON brin_test USING brin (a) WITH (pages_per_range=$pagerange) $where"
	else
		# some hardcoded values to keep the filter small enough
		ddl="CREATE INDEX ON brin_test USING brin (a int8_bloom_ops(n_distinct_per_range=5000, false_positive_rate=0.05)) WITH (pages_per_range=$pagerange) $where"
	fi

	psql test > tmp.log <<EOF
SET maintenance_work_mem = '1GB';
SET max_parallel_workers = $parallel_workers;
SET max_parallel_maintenance_workers = $maintenance_workers;
SELECT extract(epoch from now()), 'start';
$ddl;
SELECT extract(epoch from now()), 'end';
EOF

	start=`grep start tmp.log | awk '{print $1}'`
	end=`grep end tmp.log | awk '{print $1}'`
	echo "1000 * ($end - $start)" | bc >> timings.log

	check_brin_ranges $opclass $pagerange

	check_brin_values "$where"

done
