Dear John, On Thu, Sep 25, 2014 at 10:03:27AM -0400, John Lauro wrote: > One of the reasons I like ksh is that true, echo, and sleep (among > many others) are all builtin, so you don't need those commands on the > filesystem, so the script is less likely to fail if the filesystem > fails... that said you probably don't have ksh installed by default.
Thanks for the hint! I just wrote a simple watchdog resource agent and the corresponding shell script which successfully reboots a server when the disk fails. I provided my solution in the attachment. Put crude-watchdog.sh in /root/, and put crude-watchdog in /usr/lib/ocf/resource.d/heartbeat/. In my two node cluster I used these commands to let this watchdog run on all two machines: pcs resource create WATCHDOG ocf:heartbeat:crude-watchdog pcs resource clone WATCHDOG Best regards, Carsten -- andrena objects ag Büro Frankfurt Clemensstr. 8 60487 Frankfurt Tel: +49 (0) 69 977 860 38 Fax: +49 (0) 69 977 860 39 http://www.andrena.de Vorstand: Hagen Buchwald, Matthias Grund, Dr. Dieter Kuhn Aufsichtsratsvorsitzender: Rolf Hetzelberger Sitz der Gesellschaft: Karlsruhe Amtsgericht Mannheim, HRB 109694 USt-IdNr. DE174314824 Bitte beachten Sie auch unsere anstehenden Veranstaltungen: http://www.andrena.de/events
crude-watchdog.sh
Description: Bourne shell script
#!/bin/sh
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
SCRIPT=/root/crude-watchdog.sh
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="crude-watchdog" version="1.0">
<version>1.0</version>
<longdesc lang="en">
This agent reboots the system if the root file system stops working.
</longdesc>
<shortdesc lang="en">
This agent reboots the system if the root file system stops working.
</shortdesc>
<parameters>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="monitor" timeout="20" interval="10" depth="0" />
<action name="reload" timeout="20" />
<action name="migrate_to" timeout="20" />
<action name="migrate_from" timeout="20" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
END
}
#######################################################################
watchdog_usage() {
cat <<END
usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
watchdog_start() {
watchdog_monitor
if [ $? = $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
nohup $SCRIPT &
}
watchdog_stop() {
watchdog_monitor
if [ $? = $OCF_SUCCESS ]; then
killall crude-watchdog.sh
fi
watchdog_monitor
if [ $? = $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
watchdog_monitor() {
RES=`ps aux | grep crude-watchdog.sh | grep -v grep -q`
if [ $? = 0 ]; then
return $OCF_SUCCESS
fi
return $OCF_NOT_RUNNING
}
watchdog_validate() {
if [ -x "$SCRIPT" ]; then
return $OCF_SUCCESS
fi
return $OCF_ERR_ARGS
}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) watchdog_start;;
stop) watchdog_stop;;
monitor) watchdog_monitor;;
migrate_to) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to
${OCF_RESKEY_CRM_meta_migrate_target}."
watchdog_stop
;;
migrate_from) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} from
${OCF_RESKEY_CRM_meta_migrate_source}."
watchdog_start
;;
reload) ocf_log info "Reloading ${OCF_RESOURCE_INSTANCE} ..."
;;
validate-all) watchdog_validate;;
usage|help) watchdog_usage
exit $OCF_SUCCESS
;;
*) watchdog_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
signature.asc
Description: Digital signature
_______________________________________________ Pacemaker mailing list: [email protected] http://oss.clusterlabs.org/mailman/listinfo/pacemaker Project Home: http://www.clusterlabs.org Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf Bugs: http://bugs.clusterlabs.org
