#!/bin/bash

psql test -c "drop table if exists t" > /dev/null
psql test -c "create table t (a int)" > /dev/null

echo '' > saop-queries.sql
echo '' > saop-inserts.sql

x=0
y=0

echo insert query fraction type rows hashjoin distinct bloom binsearch fpr run duration results

for n in 10 100 1000 10000 100000; do

	for d in 10 100 1000 10000 25000; do

		if [ "$d" -gt "$n" ]; then
			continue
		fi

		psql test -c "truncate t" > /dev/null
		psql test -c "insert into t select mod(i,$d) from generate_series(1,$n) s(i)" > /dev/null

		psql test -c "vacuum analyze t" > /dev/null
		psql test -c "checkpoint" > /dev/null

		x=$((x+1))
		echo $x "insert into t select mod(i,$d) from generate_series(1,$n) s(i)" >> saop-inserts.sql

		# 100% matches
		for fpr in 0.01 0.05 0.1 0.2; do

			for r in `seq 1 5`; do

				v=`psql test -t -A -c "select string_agg(i::text, ', '), string_agg(i::text, '), (') from generate_series(0, $d-1) s(i)"`

				v1=`echo $v | sed 's/|.*//'`
				v2=`echo $v | sed 's/.*|//'`

				y=$((y+1))
				echo $y "SELECT count(*) FROM t WHERE a IN ($v1);" >> saop-queries.sql

				for bloom in on off; do

					for binsearch in on off; do

						if [ "$bloom" == "on" ] && [ "$binsearch" == "on" ]; then
							continue
						fi

						if [ "$bloom" == "off" ] && [ "$fpr" != "0.01" ]; then
							continue
						fi

						psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = $bloom;
set enable_saop_binsearch = $binsearch;
set max_parallel_workers_per_gather = 0;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN ($v1);
EOF

						time=`grep Time tmp.log | awk '{print $2}'`
						rows=`tail -n 2 tmp.log | head -n 1`

						echo $x $y "match-100" int $n $d off $bloom $binsearch $fpr $r $time $rows

					done

				done

				# try running a hashjoin-based version of the query
				if [ "$fpr" == "0.01" ]; then

					y=$((y+1))
					echo $y "SELECT count(*) FROM t WHERE a IN (VALUES ($v2));" >> saop-queries.sql

					psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = off;
set enable_saop_binsearch = off;
set max_parallel_workers_per_gather = 0;
set enable_mergejoin = off;
set enable_nestloop = off;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN (VALUES ($v2));
EOF

					time=`grep Time tmp.log | awk '{print $2}'`
					rows=`tail -n 2 tmp.log | head -n 1`

					echo $x $y "match-100" int $n $d on off off $fpr $r $time $rows

				fi

			done

		done


		# 50% matches
		for fpr in 0.01 0.05 0.1 0.2; do

			for r in `seq 1 5`; do

				v=`psql test -t -A -c "select string_agg(i::text, ', '), string_agg(i::text, '), (') from (select (i-1) as i from generate_series(1, $d * 2) s(i) order by random() limit $d) foo"`

				v1=`echo $v | sed 's/|.*//'`
				v2=`echo $v | sed 's/.*|//'`

				y=$((y+1))
				echo $y "SELECT count(*) FROM t WHERE a IN ($v1);" >> saop-queries.sql

				for bloom in on off; do

					for binsearch in on off; do

						if [ "$bloom" == "on" ] && [ "$binsearch" == "on" ]; then
							continue
						fi

						if [ "$bloom" == "off" ] && [ "$fpr" != "0.01" ]; then
							continue
						fi

						psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = $bloom;
set enable_saop_binsearch = $binsearch;
set max_parallel_workers_per_gather = 0;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN ($v1);
EOF

						time=`grep Time tmp.log | awk '{print $2}'`
						rows=`tail -n 2 tmp.log | head -n 1`

						echo $x $y "match-50" int $n $d off $bloom $binsearch $fpr $r $time $rows

					done

				done

				# try running a hashjoin-based version of the query
				if [ "$fpr" == "0.01" ]; then

					y=$((y+1))
					echo $y "SELECT count(*) FROM t WHERE a IN (VALUES ($v2));" >> saop-queries.sql

					psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = off;
set enable_saop_binsearch = off;
set max_parallel_workers_per_gather = 0;
set enable_mergejoin = off;
set enable_nestloop = off;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN (VALUES ($v2));
EOF

					time=`grep Time tmp.log | awk '{print $2}'`
					rows=`tail -n 2 tmp.log | head -n 1`

					echo $x $y "match-50" int $n $d on off off $fpr $r $time $rows

				fi

			done

		done


		# 10% matches
		for fpr in 0.01 0.05 0.1 0.2; do

			for r in `seq 1 5`; do

				v=`psql test -t -A -c "select string_agg(i::text, ', '), string_agg(i::text, '), (') from (select (i-1) as i from generate_series(1, $d * 10) s(i) order by random() limit $d) foo"`

				v1=`echo $v | sed 's/|.*//'`
				v2=`echo $v | sed 's/.*|//'`

				y=$((y+1))
				echo $y "SELECT count(*) FROM t WHERE a IN ($v1);" >> saop-queries.sql

				for bloom in on off; do

					for binsearch in on off; do

						if [ "$bloom" == "on" ] && [ "$binsearch" == "on" ]; then
							continue
						fi

						if [ "$bloom" == "off" ] && [ "$fpr" != "0.01" ]; then
							continue
						fi

						psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = $bloom;
set enable_saop_binsearch = $binsearch;
set max_parallel_workers_per_gather = 0;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN ($v1);
EOF

						time=`grep Time tmp.log | awk '{print $2}'`
						rows=`tail -n 2 tmp.log | head -n 1`

						echo $x $y "match-10" int $n $d off $bloom $binsearch $fpr $r $time $rows

					done

				done

				# try running a hashjoin-based version of the query
				if [ "$fpr" == "0.01" ]; then

					y=$((y+1))
					echo $y "SELECT count(*) FROM t WHERE a IN (VALUES ($v2));" >> saop-queries.sql

					psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = off;
set enable_saop_binsearch = off;
set max_parallel_workers_per_gather = 0;
set enable_mergejoin = off;
set enable_nestloop = off;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN (VALUES ($v2));
EOF

					time=`grep Time tmp.log | awk '{print $2}'`
					rows=`tail -n 2 tmp.log | head -n 1`

					echo $x $y "match-10" int $n $d on off off $fpr $r $time $rows

				fi

			done

		done


		# 1% matches
		for fpr in 0.01 0.05 0.1 0.2; do

			for r in `seq 1 5`; do

				v=`psql test -t -A -c "select string_agg(i::text, ', '), string_agg(i::text, '), (') from (select (i-1) as i from generate_series(1, $d * 100) s(i) order by random() limit $d) foo"`

				v1=`echo $v | sed 's/|.*//'`
				v2=`echo $v | sed 's/.*|//'`

				y=$((y+1))
				echo $y "SELECT count(*) FROM t WHERE a IN ($v1);" >> saop-queries.sql

				for bloom in on off; do

					for binsearch in on off; do

						if [ "$bloom" == "on" ] && [ "$binsearch" == "on" ]; then
							continue
						fi

						if [ "$bloom" == "off" ] && [ "$fpr" != "0.01" ]; then
							continue
						fi

						psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = $bloom;
set enable_saop_binsearch = $binsearch;
set max_parallel_workers_per_gather = 0;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN ($v1);
EOF

						time=`grep Time tmp.log | awk '{print $2}'`
						rows=`tail -n 2 tmp.log | head -n 1`

						echo $x $y "match-1" int $n $d off $bloom $binsearch $fpr $r $time $rows

					done

				done

				# try running a hashjoin-based version of the query
				if [ "$fpr" == "0.01" ]; then

					y=$((y+1))
					echo $y "SELECT count(*) FROM t WHERE a IN (VALUES ($v2));" >> saop-queries.sql

					psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = off;
set enable_saop_binsearch = off;
set max_parallel_workers_per_gather = 0;
set enable_mergejoin = off;
set enable_nestloop = off;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN (VALUES ($v2));
EOF

					time=`grep Time tmp.log | awk '{print $2}'`
					rows=`tail -n 2 tmp.log | head -n 1`

					echo $x $y "match-1" int $n $d on off off $fpr $r $time $rows

				fi

			done

		done

	done

done


psql test -c "drop table if exists t" > /dev/null
psql test -c "create table t (a text collate \"en_US\")" > /dev/null

for n in 10 100 1000 10000 100000; do

	for d in 10 100 1000 10000 25000; do

		if [ "$d" -gt "$n" ]; then
			continue
		fi

		psql test -c "truncate t" > /dev/null
		psql test -c "insert into t select md5(mod(i,$d)::text) from generate_series(1,$n) s(i)" > /dev/null

		psql test -c "vacuum analyze t" > /dev/null
		psql test -c "checkpoint" > /dev/null

		x=$((x+1))
		echo $x "insert into t select md5(mod(i,$d)::text) from generate_series(1,$n) s(i)" >> saop-inserts.sql

		# 100% matches
		for fpr in 0.01 0.05 0.1 0.2; do

			for r in `seq 1 5`; do

				v=`psql test -t -A -c "select string_agg(md5(i::text), ''', '''), string_agg(md5(i::text), '''), (''') from generate_series(0, $d-1) s(i)"`

				v1=`echo $v | sed 's/|.*//'`
				v2=`echo $v | sed 's/.*|//'`

				y=$((y+1))
				echo $y "SELECT count(*) FROM t WHERE a IN ('$v1');" >> saop-queries.sql

				for bloom in on off; do

					for binsearch in on off; do

						if [ "$bloom" == "on" ] && [ "$binsearch" == "on" ]; then
							continue
						fi

						if [ "$bloom" == "off" ] && [ "$fpr" != "0.01" ]; then
							continue
						fi

						psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = $bloom;
set enable_saop_binsearch = $binsearch;
set max_parallel_workers_per_gather = 0;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN ('$v1');
EOF

						time=`grep Time tmp.log | awk '{print $2}'`
						rows=`tail -n 2 tmp.log | head -n 1`

						echo $x $y "match-100" text $n $d off $bloom $binsearch $fpr $r $time $rows

					done

				done

				# try running a hashjoin-based version of the query
				if [ "$fpr" == "0.01" ]; then

					y=$((y+1))
					echo $y "SELECT count(*) FROM t WHERE a IN (VALUES ('$v2'));" >> saop-queries.sql

					psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = off;
set enable_saop_binsearch = off;
set max_parallel_workers_per_gather = 0;
set enable_mergejoin = off;
set enable_nestloop = off;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN (VALUES ('$v2'));
EOF

					time=`grep Time tmp.log | awk '{print $2}'`
					rows=`tail -n 2 tmp.log | head -n 1`

					echo $x $y "match-100" text $n $d on off off $fpr $r $time $rows

				fi

			done

		done


		# 50% matches
		for fpr in 0.01 0.05 0.1 0.2; do

			for r in `seq 1 5`; do

				v=`psql test -t -A -c "select string_agg(i::text, ''', '''), string_agg(i::text, '''), (''') from (select md5((i-1)::text) as i from generate_series(1, $d * 2) s(i) order by random() limit $d) foo"`

				v1=`echo $v | sed 's/|.*//'`
				v2=`echo $v | sed 's/.*|//'`

				y=$((y+1))
				echo $y "SELECT count(*) FROM t WHERE a IN ('$v1');" >> saop-queries.sql

				for bloom in on off; do

					for binsearch in on off; do

						if [ "$bloom" == "on" ] && [ "$binsearch" == "on" ]; then
							continue
						fi

						if [ "$bloom" == "off" ] && [ "$fpr" != "0.01" ]; then
							continue
						fi

						psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = $bloom;
set enable_saop_binsearch = $binsearch;
set max_parallel_workers_per_gather = 0;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN ('$v1');
EOF

						time=`grep Time tmp.log | awk '{print $2}'`
						rows=`tail -n 2 tmp.log | head -n 1`

						echo $x $y "match-50" text $n $d off $bloom $binsearch $fpr $r $time $rows

					done

				done

				# try running a hashjoin-based version of the query
				if [ "$fpr" == "0.01" ]; then

					y=$((y+1))
					echo $y "SELECT count(*) FROM t WHERE a IN (VALUES ('$v2'));" >> saop-queries.sql

					psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = off;
set enable_saop_binsearch = off;
set max_parallel_workers_per_gather = 0;
set enable_mergejoin = off;
set enable_nestloop = off;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN (VALUES ('$v2'));
EOF

					time=`grep Time tmp.log | awk '{print $2}'`
					rows=`tail -n 2 tmp.log | head -n 1`

					echo $x $y "match-50" text $n $d on off off $fpr $r $time $rows

				fi

			done

		done


		# 10% matches
		for fpr in 0.01 0.05 0.1 0.2; do

			for r in `seq 1 5`; do

				v=`psql test -t -A -c "select string_agg(i::text, ''', '''), string_agg(i::text, '''), (''') from (select md5((i-1)::text) as i from generate_series(1, $d * 10) s(i) order by random() limit $d) foo"`

				v1=`echo $v | sed 's/|.*//'`
				v2=`echo $v | sed 's/.*|//'`

				y=$((y+1))
				echo $y "SELECT count(*) FROM t WHERE a IN ('$v1');" >> saop-queries.sql

				for bloom in on off; do

					for binsearch in on off; do

						if [ "$bloom" == "on" ] && [ "$binsearch" == "on" ]; then
							continue
						fi

						if [ "$bloom" == "off" ] && [ "$fpr" != "0.01" ]; then
							continue
						fi

						psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = $bloom;
set enable_saop_binsearch = $binsearch;
set max_parallel_workers_per_gather = 0;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN ('$v1');
EOF

						time=`grep Time tmp.log | awk '{print $2}'`
						rows=`tail -n 2 tmp.log | head -n 1`

						echo $x $y "match-10" text $n $d off $bloom $binsearch $fpr $r $time $rows

					done

				done

				# try running a hashjoin-based version of the query
				if [ "$fpr" == "0.01" ]; then

					y=$((y+1))
					echo $y "SELECT count(*) FROM t WHERE a IN (VALUES ('$v2'));" >> saop-queries.sql

					psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = off;
set enable_saop_binsearch = off;
set max_parallel_workers_per_gather = 0;
set enable_mergejoin = off;
set enable_nestloop = off;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN (VALUES ('$v2'));
EOF

					time=`grep Time tmp.log | awk '{print $2}'`
					rows=`tail -n 2 tmp.log | head -n 1`

					echo $x $y "match-10" text $n $d on off off $fpr $r $time $rows

				fi

			done

		done


		# 1% matches
		for fpr in 0.01 0.05 0.1 0.2; do

			for r in `seq 1 5`; do

				v=`psql test -t -A -c "select string_agg(i::text, ''', '''), string_agg(i::text, '''), (''') from (select md5((i-1)::text) as i from generate_series(1, $d * 100) s(i) order by random() limit $d) foo"`

				v1=`echo $v | sed 's/|.*//'`
				v2=`echo $v | sed 's/.*|//'`

				y=$((y+1))
				echo $y "SELECT count(*) FROM t WHERE a IN ('$v1');" >> saop-queries.sql

				for bloom in on off; do

					for binsearch in on off; do

						if [ "$bloom" == "on" ] && [ "$binsearch" == "on" ]; then
							continue
						fi

						if [ "$bloom" == "off" ] && [ "$fpr" != "0.01" ]; then
							continue
						fi

						psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = $bloom;
set enable_saop_binsearch = $binsearch;
set max_parallel_workers_per_gather = 0;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN ('$v1');
EOF

						time=`grep Time tmp.log | awk '{print $2}'`
						rows=`tail -n 2 tmp.log | head -n 1`

						echo $x $y "match-1" text $n $d off $bloom $binsearch $fpr $r $time $rows

					done

				done

				# try running a hashjoin-based version of the query
				if [ "$fpr" == "0.01" ]; then

					y=$((y+1))
					echo $y "SELECT count(*) FROM t WHERE a IN (VALUES ('$v2'));" >> saop-queries.sql

					psql -t -A test > tmp.log <<EOF
set enable_saop_threshold = 1;
set enable_saop_bloom = off;
set enable_saop_binsearch = off;
set max_parallel_workers_per_gather = 0;
set enable_mergejoin = off;
set enable_nestloop = off;
set saop_bloom_false_positives = $fpr;

\timing
SELECT count(*) FROM t WHERE a IN (VALUES ('$v2'));
EOF

					time=`grep Time tmp.log | awk '{print $2}'`
					rows=`tail -n 2 tmp.log | head -n 1`

					echo $x $y "match-1" text $n $d on off off $fpr $r $time $rows

				fi

			done

		done

	done

done

