#!/usr/bin/bash

DATADIR_PRIMARY=data-primary
DATADIR_STANDBY=data-standby
PGCTLTIMEOUT=3600
WALDIR=$(pwd)/wal

killall -9 postgres
rm -Rf $DATADIR_PRIMARY $DATADIR_STANDBY primary.running standby.running

mkdir control wal

# init primary
pg_ctl -D $DATADIR_PRIMARY init
echo 'wal_level = logical' >> $DATADIR_PRIMARY/postgresql.conf 2>&1
echo "log_line_prefix = '%n %m [%p] [%b:%a] [%c:%l] [%s] [%v/%x] '" >> $DATADIR_PRIMARY/postgresql.conf 2>&1
#echo "log_min_duration_statement = 0" >> $DATADIR_PRIMARY/postgresql.conf 2>&1
echo "checkpoint_timeout = '1min'" >> $DATADIR_PRIMARY/postgresql.conf 2>&1
echo "archive_mode = 'on'" >> $DATADIR_PRIMARY/postgresql.conf 2>&1
echo "archive_command = 'cp %p $WALDIR/.tmp && mv $WALDIR/.tmp $WALDIR/%f'" >> $DATADIR_PRIMARY/postgresql.conf 2>&1

pg_ctl -D $DATADIR_PRIMARY -l pg-primary.log start
touch primary.running

# init a replica
pg_basebackup -D $DATADIR_STANDBY -c fast -R -C -S replica
echo "port = 5433" >> $DATADIR_STANDBY/postgresql.conf 2>&1
pg_ctl -D $DATADIR_STANDBY -l pg-standby.log start
touch standby.running

function primary_start()
{
	r=$1

	if [ -f primary.running ]; then
		return
	fi

	touch primary.running

	echo `date` "loop $r start primary"

	while /bin/true; do
		ts=$(date +%s)
		echo `date` "loop $r start primary: $ts"

		pg_controldata $DATADIR_PRIMARY > control/primary.$ts

		pg_ctl -D $DATADIR_PRIMARY -l pg-primary.log start
		x=$?

		if [ "$x" == "0" ]; then
			break
		fi

		echo `date` "loop $r retry start primary"
		sleep 1
	done

	echo `date` "loop $r primary started"
}

function primary_stop()
{
	r=$1

	# stop the primary in some way (or not at all)
	m=$((RANDOM % 3))

	if [ "$m" == "0" ]; then
		lsn=$(psql postgres -t -A -c "select pg_current_wal_lsn()")
		ts=$(date +%s)
		echo `date` "stopping primary / immediate $ts / $lsn"
		pg_ctl -D $DATADIR_PRIMARY -m immediate stop
		pg_controldata $DATADIR_PRIMARY > control/primary.$ts
		rm primary.running
	elif [ "$m" == "1" ]; then
		lsn=$(psql postgres -t -A -c "select pg_current_wal_lsn()")
		ts=$(date +%s)
		echo `date` "stopping primary / fast $ts / $lsn"
		pg_ctl -D $DATADIR_PRIMARY -m fast stop
		pg_controldata $DATADIR_PRIMARY > control/primary.$ts
		rm primary.running
	else
		echo `date` "not stopping primary"
	fi
}

function primary_verify_checksums()
{
	r=$1
	s=$2

	m=$((RANDOM % 2))

	if [ "$m" == "0" ]; then
		echo `date` "stopping primary / immediate"
		pg_ctl -D $DATADIR_PRIMARY -m immediate stop
	else
		echo `date` "stopping primary / fast"
		pg_ctl -D $DATADIR_PRIMARY -m fast stop
	fi

	echo `date` "starting primary"
	pg_ctl -D $DATADIR_PRIMARY -l pg-primary.log start
}

function primary_pgbench()
{
	r=$1
	db=$2

	m=$((RANDOM % 2))
	c=$((RANDOM % 4 + 1))

	if [ "$m" == "0" ]; then
		echo `date` "starting pgbench on primary, $c clients"
		pgbench -c $c -P 1 -T 3600 $db >> pgbench-primary.log 2>&1 &
	else
		echo `date` "starting pgbench on primary (-C), $c clients"
		pgbench -C -c $c -P 1 -T 3600 $db >> pgbench-primary.log 2>&1 &
	fi
}

function standby_start()
{
	r=$1

	if [ -f standby.running ]; then
		return
	fi

	touch standby.running

	echo `date` "loop $r start standby"

	while /bin/true; do
		ts=$(date +%s)
		echo `date` "loop $r start standby: $ts"

		pg_controldata $DATADIR_STANDBY > control/standby.$ts

		pg_ctl -D $DATADIR_STANDBY -l pg-standby.log start
		x=$?

		if [ "$x" == "0" ]; then
			break
		fi

		echo `date` "loop $r retry start standby"
		sleep 1
	done

	echo `date` "loop $r standby started"
}

function standby_stop()
{
	r=$1

	# stop the standby in some way (or not at all)
	m=$((RANDOM % 3))

	if [ "$m" == "0" ]; then
		lsn=$(psql postgres -t -p 5433 -A -c "select pg_current_wal_lsn()")
		ts=$(date +%s)
		echo `date` "stopping standby / immediate $ts / $lsn"
		pg_ctl -D $DATADIR_STANDBY -m immediate stop
		pg_controldata $DATADIR_STANDBY > control/standby.$ts
		rm standby.running
	elif [ "$m" == "1" ]; then
		lsn=$(psql postgres -t -p 5433 -A -c "select pg_current_wal_lsn()")
		ts=$(date +%s)
		echo `date` "stopping standby / fast $ts / $lsn"
		pg_ctl -D $DATADIR_STANDBY -m fast stop
		pg_controldata $DATADIR_STANDBY > control/standby.$ts
		rm standby.running
	else
		echo `date` "not stopping standby"
	fi
}

function standby_verify_checksums()
{
	r=$1
	s=$2

	m=$((RANDOM % 2))

	if [ "$m" == "0" ]; then
		echo `date` "stopping standby / immediate"
		pg_ctl -D $DATADIR_STANDBY -m immediate stop
	else
		echo `date` "stopping standby / fast"
		pg_ctl -D $DATADIR_STANDBY -m fast stop
	fi

	echo `date` "starting standby"
	pg_ctl -D $DATADIR_STANDBY -l pg-standby.log start
}

function standby_catch_up()
{
	r=$1

	while /bin/true; do

		d=$(psql -t -A postgres -c "select (pg_current_wal_lsn() - replay_lsn) from pg_stat_replication")
		x=$(psql -t -A postgres -c "select (pg_current_wal_lsn() - replay_lsn) < 16384 from pg_stat_replication")

		if [ "$x" == "t" ]; then
			break
		fi

		echo `date` "loop $r waiting for standby to catch up ($d bytes)"
		sleep 1

	done
}

function standby_pgbench()
{
	r=$1
	db=$2

	m=$((RANDOM % 2))
	c=$((RANDOM % 4 + 1))

	if [ "$m" == "0" ]; then
		echo `date` "starting pgbench on standby, $c clients"
		pgbench -n -S -p 5433 -c $c -P 1 -T 3600 $db >> pgbench-standby.log 2>&1 &
	else
		echo `date` "starting pgbench on standby (-C), $c clients"
		pgbench -C -n -S -p 5433 -c $c -P 1 -T 3600 $db >> pgbench-standby.log 2>&1 &
	fi
}

function random_sleep()
{
	r=$1
	sleep=$2

	x=$((RANDOM % sleep + 1))
	echo `date` "loop $r sleeping for $x seconds"
	sleep $x
}

# small test
sleep=10
m=fast
# default is 'on', so start with disable
s="disable"

# 100 loops of the primary restarts
for r in $(seq 1 200); do

	x=$((RANDOM % 10))
	db="test_$x"

	scale=$((RANDOM % 40 + 10))

	# maybe create a new DB with random scale
	dropdb --if-exists $db
	createdb $db
	pgbench -i -s $scale $db >> pgbench-init.log 2>&1

	# run pgbench in the background
	echo `date` "loop $r pgbench scale $scale"

	# run pgbench on primary/standby in the background
	primary_pgbench $r $db
	standby_pgbench $r $db

	# sleep for a bit
	random_sleep $r $sleep

	# sleep for a bit
	random_sleep $r $sleep

	# stop the primary/standby in some way
	primary_stop $r
	standby_stop $r

	# start the primary/stadby again
	primary_start $r
	standby_start $r

	# run pgbench on primary/standby in the background
	primary_pgbench $r $db
	standby_pgbench $r $db

	# sleep for a bit
	random_sleep $r $sleep

	# stop primary/standby, verify checksums and start again
	echo `date` "loop $r verify checksums on primary"
	primary_verify_checksums $r $s

	echo `date` "loop $r verify checksums on standby"
	standby_verify_checksums $r $s

	# wait for standy to catch up
	echo `date` "loop $r wait for standby to catch up"
	standby_catch_up $r

	# flip the state for the next loop
	if [ "$s" == "disable" ]; then
		s="enable"
	else
		s="disable"
	fi

done
