#!/bin/sh

# version 1.3 (05.Mar.2009)
# * hanging() check the total CPU usage of multiple bacula-dir processes and kill all of them (if the total > 80%), besides the least taking one (which is supposed to be the master one)

# version 1.2.2 (26.Feb.2009)
# * compatibility with linux - relying on `uname`.

# version 1.2.1 (30.Jan.2009)
# * bugfix: hanging() - check for CPU usage 100+% too
# * bugfix: hanging() - fix parsing PID from `top`

# version 1.2 (11.Nov.2008)
# * more precise match of still hanging processes
# * function hanging() for checking hanging jobs
# * report if no hanging jobs are found

# version 1.1 (20.Oct.2008)
# * add check for hanging bacula-dir processes

jobName="$1"
jobId="$2"

arch="`uname`"

case $arch in
	*Linux*)
		top="top -b -n1"
		bconsole=/usr/sbin/bconsole
	;;
	*BSD*)
		top="top -b"
		bconsole=/usr/local/sbin/bconsole
	;;
	*)
		echo "Unknown architecture! Please define this and commands for 'top' and 'bconsole' in the script!"
		exit 1
esac

hanging () {
	procs="`$top | grep bacula-dir`"
	procs_n=`echo "$procs" | wc -l`
	# let's leave the least CPU taking process out - this doesn't hang
	i=1
	while [ $i -lt $procs_n ]; do
		# get CPU usage of the process
		cpu=`echo "$procs" | sed "$i!d" | grep --only-matching '..\...%' | sed 's/\...%//'`
		cpu_sum=$((cpu_sum + $cpu))
		procs_id="$procs_id `echo "$procs" | sed "$i!d" | grep --only-matching '^[ ]*[0-9]\+'`"
		i=$((i+1))
	done
	if [ "$cpu_sum" ] && [ "$cpu_sum" -lt 80 ]; then
		# processes taken into account will be killed!
		echo $procs_id
	fi
}

# ===== check hanging jobs =====
# sometimes bacula-dir just hangs taking 90%+ of CPU, so let's kill those processes
hang="`hanging`"
if [ ! "$hang" ]; then
	echo "No hanging bacula-dir processes found! :)"
else
	# lets be sure the processes really hang
	sleep 2
	for p in `hanging`; do
		if ( echo "$hang" | grep "\<$p\>" > /dev/null ); then
			echo "Process $p seems to be hanging.. killing it.."
			kill -9 $p
		fi
	done
fi

# ===== check duplicate jobs =====
found=`echo "status dir" | $bconsole | grep -v "^ *${jobId}" | \
grep --only-matching " ${jobName}\.[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}.*is \(running\|waiting\)"`

if [ "$found" != "" ]; then
	echo "Duplicate job found:$found!"
	exit 1
else
	echo "No duplicate jobs found! :)"
	exit 0
fi
