Hi Andrew,
I found another behavior that is caused because the cluster forgets
the resource is supposed to stay stopped.
For example, in the case of a node which has primitive and master/
slave resource.
Their settings of on-fail is "standby".
When the master/slave resource is failed, all resources on failed
node are going to stop. And master/slave resource's fail-count is
increased.
But then, only primitive resource re-starts on failed node because
its fail-count is not be increased and the cluster forgets the
resource is supposed to stay stopped...
When F/O occurs,
in the case of _not_ master/slave resource,
pengine creates one graph to stop and restart the resource.
And in the case of master/slave resource, it creates a graph 2 times.
One is for the resource's stop-process and another is for restart-
process.
And when it creates a graph for restart-process,
no one remembers that resources are supposed to stay stopped on
failed node.
This behavior is same as (or similar to) what you are worried, isn't
it?
To avoid this behavior, it requires to update the status of a node
before restart-process.
On trial, I created a patch (for pacemaker-dev 366b14d79780).
And I attached the graph with patched pacemaker.
It's not a "general" way, just for reference...
Regards,
Satomi TANIGUCHI
diff -urN pacemaker-dev/crmd/te_actions.c pacemaker-dev.mod/crmd/
te_actions.c
--- pacemaker-dev/crmd/te_actions.c 2008-11-26 10:47:46.000000000
+0900
+++ pacemaker-dev.mod/crmd/te_actions.c 2008-11-26
10:48:47.000000000 +0900
@@ -175,6 +175,42 @@
return TRUE;
}
+static gboolean
+te_standby_node(crm_graph_t *graph, crm_action_t *action)
+{
+ const char *id = NULL;
+ const char *uuid = NULL;
+ const char *target = NULL;
+
+ id = ID(action->xml);
+ target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+
+ CRM_CHECK(id != NULL,
+ crm_log_xml_warn(action->xml, "BadAction");
+ return FALSE);
+ CRM_CHECK(uuid != NULL,
+ crm_log_xml_warn(action->xml, "BadAction");
+ return FALSE);
+ CRM_CHECK(target != NULL,
+ crm_log_xml_warn(action->xml, "BadAction");
+ return FALSE);
+
+ te_log_action(LOG_INFO,
+ "Executing standby operation (%s) on %s", id, target);
+
+ if (cib_ok > set_standby(fsa_cib_conn, uuid, XML_CIB_TAG_NODES,
"on")) {
+ crm_err("Cannot standby %s: set_standby() call failed.",
target);
+ }
+
+ crm_info("Skipping wait for %d", action->id);
+ action->confirmed = TRUE;
+ update_graph(graph, action);
+ trigger_graph();
+
+ return TRUE;
+}
+
static int get_target_rc(crm_action_t *action)
{
const char *target_rc_s = crm_meta_value(action->params,
XML_ATTR_TE_TARGET_RC);
@@ -500,7 +536,8 @@
te_pseudo_action,
te_rsc_command,
te_crm_command,
- te_fence_node
+ te_fence_node,
+ te_standby_node
};
void
diff -urN pacemaker-dev/include/crm/crm.h pacemaker-dev.mod/include/
crm/crm.h
--- pacemaker-dev/include/crm/crm.h 2008-11-26 10:47:46.000000000
+0900
+++ pacemaker-dev.mod/include/crm/crm.h 2008-11-26
10:48:47.000000000 +0900
@@ -146,6 +146,7 @@
#define CRM_OP_SHUTDOWN_REQ "req_shutdown"
#define CRM_OP_SHUTDOWN "do_shutdown"
#define CRM_OP_FENCE "stonith"
+#define CRM_OP_STANDBY "standby"
#define CRM_OP_EVENTCC "event_cc"
#define CRM_OP_TEABORT "te_abort"
#define CRM_OP_TEABORTED "te_abort_confirmed" /* we asked */
diff -urN pacemaker-dev/include/crm/pengine/common.h pacemaker-
dev.mod/include/crm/pengine/common.h
--- pacemaker-dev/include/crm/pengine/common.h 2008-11-26
10:47:46.000000000 +0900
+++ pacemaker-dev.mod/include/crm/pengine/common.h 2008-11-26
10:48:47.000000000 +0900
@@ -52,6 +52,7 @@
action_demote,
action_demoted,
shutdown_crm,
+ standby_node,
stonith_node
};
diff -urN pacemaker-dev/include/crm/pengine/status.h pacemaker-
dev.mod/include/crm/pengine/status.h
--- pacemaker-dev/include/crm/pengine/status.h 2008-11-26
10:47:46.000000000 +0900
+++ pacemaker-dev.mod/include/crm/pengine/status.h 2008-11-26
10:48:47.000000000 +0900
@@ -104,6 +104,7 @@
const char *uname;
gboolean online;
gboolean standby;
+ gboolean action_standby;
gboolean pending;
gboolean unclean;
gboolean shutdown;
diff -urN pacemaker-dev/include/crm/transition.h pacemaker-dev.mod/
include/crm/transition.h
--- pacemaker-dev/include/crm/transition.h 2008-11-26
10:47:46.000000000 +0900
+++ pacemaker-dev.mod/include/crm/transition.h 2008-11-26
10:48:47.000000000 +0900
@@ -115,6 +115,7 @@
gboolean (*rsc)(crm_graph_t *graph, crm_action_t *action);
gboolean (*crmd)(crm_graph_t *graph, crm_action_t *action);
gboolean (*stonith)(crm_graph_t *graph, crm_action_t *action);
+ gboolean (*standby)(crm_graph_t *graph, crm_action_t *action);
} crm_graph_functions_t;
enum transition_status {
diff -urN pacemaker-dev/lib/pengine/common.c pacemaker-dev.mod/lib/
pengine/common.c
--- pacemaker-dev/lib/pengine/common.c 2008-11-26 10:47:46.000000000
+0900
+++ pacemaker-dev.mod/lib/pengine/common.c 2008-11-26
10:48:47.000000000 +0900
@@ -178,6 +178,8 @@
return shutdown_crm;
} else if(safe_str_eq(task, CRM_OP_FENCE)) {
return stonith_node;
+ } else if(safe_str_eq(task, CRM_OP_STANDBY)) {
+ return standby_node;
} else if(safe_str_eq(task, CRMD_ACTION_STATUS)) {
return monitor_rsc;
} else if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
@@ -245,6 +247,9 @@
case stonith_node:
result = CRM_OP_FENCE;
break;
+ case standby_node:
+ result = CRM_OP_STANDBY;
+ break;
case monitor_rsc:
result = CRMD_ACTION_STATUS;
break;
diff -urN pacemaker-dev/lib/pengine/unpack.c pacemaker-dev.mod/lib/
pengine/unpack.c
--- pacemaker-dev/lib/pengine/unpack.c 2008-11-26 10:47:46.000000000
+0900
+++ pacemaker-dev.mod/lib/pengine/unpack.c 2008-11-26
10:48:47.000000000 +0900
@@ -240,6 +240,7 @@
*/
new_node->details->unclean = TRUE;
}
+ new_node->details->action_standby = FALSE;
if(type == NULL
|| safe_str_eq(type, "member")
@@ -832,7 +833,7 @@
stop_action(rsc, node, FALSE);
} else if(on_fail == action_fail_standby) {
- node->details->standby = TRUE;
+ node->details->action_standby = TRUE;
} else if(on_fail == action_fail_block) {
/* is_managed == FALSE will prevent any
diff -urN pacemaker-dev/lib/transition/graph.c pacemaker-dev.mod/lib/
transition/graph.c
--- pacemaker-dev/lib/transition/graph.c 2008-11-26
10:47:46.000000000 +0900
+++ pacemaker-dev.mod/lib/transition/graph.c 2008-11-26
10:48:47.000000000 +0900
@@ -188,6 +188,11 @@
crm_debug_2("Executing STONITH-event: %d",
action->id);
return graph_fns->stonith(graph, action);
+
+ } else if(safe_str_eq(task, CRM_OP_STANDBY)) {
+ crm_debug_2("Executing STANDBY-event: %d",
+ action->id);
+ return graph_fns->standby(graph, action);
}
crm_debug_2("Executing crm-event: %d", action->id);
diff -urN pacemaker-dev/lib/transition/utils.c pacemaker-dev.mod/lib/
transition/utils.c
--- pacemaker-dev/lib/transition/utils.c 2008-11-26
10:47:46.000000000 +0900
+++ pacemaker-dev.mod/lib/transition/utils.c 2008-11-26
10:48:47.000000000 +0900
@@ -41,6 +41,7 @@
pseudo_action_dummy,
pseudo_action_dummy,
pseudo_action_dummy,
+ pseudo_action_dummy,
pseudo_action_dummy
};
@@ -61,6 +62,7 @@
CRM_ASSERT(graph_fns->crmd != NULL);
CRM_ASSERT(graph_fns->pseudo != NULL);
CRM_ASSERT(graph_fns->stonith != NULL);
+ CRM_ASSERT(graph_fns->standby != NULL);
}
const char *
diff -urN pacemaker-dev/pengine/allocate.c pacemaker-dev.mod/pengine/
allocate.c
--- pacemaker-dev/pengine/allocate.c 2008-11-26 10:47:46.000000000
+0900
+++ pacemaker-dev.mod/pengine/allocate.c 2008-11-26
10:48:47.000000000 +0900
@@ -774,6 +774,15 @@
last_stonith = stonith_op;
}
+ } else if(node->details->online &&
node->details->action_standby) {
+ action_t *standby_op = NULL;
+
+ standby_op = custom_action(
+ NULL, crm_strdup(CRM_OP_STANDBY),
+ CRM_OP_STANDBY, node, FALSE, TRUE, data_set);
+
+ order_actions(standby_op, all_stopped,
pe_order_implies_left);
+
} else if(node->details->online && node->details->shutdown) {
action_t *down_op = NULL;
crm_info("Scheduling Node %s for shutdown",
diff -urN pacemaker-dev/pengine/graph.c pacemaker-dev.mod/pengine/
graph.c
--- pacemaker-dev/pengine/graph.c 2008-11-26 10:47:46.000000000 +0900
+++ pacemaker-dev.mod/pengine/graph.c 2008-11-26 10:48:47.000000000
+0900
@@ -368,7 +368,10 @@
if(safe_str_eq(action->task, CRM_OP_FENCE)) {
action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
/* needs_node_info = FALSE; */
-
+
+ } else if(safe_str_eq(action->task, CRM_OP_STANDBY)) {
+ action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
+
} else if(safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT);
diff -urN pacemaker-dev/pengine/group.c pacemaker-dev.mod/pengine/
group.c
--- pacemaker-dev/pengine/group.c 2008-11-26 10:47:46.000000000 +0900
+++ pacemaker-dev.mod/pengine/group.c 2008-11-26 10:48:47.000000000
+0900
@@ -423,6 +423,7 @@
case action_notified:
case shutdown_crm:
case stonith_node:
+ case standby_node:
break;
case stop_rsc:
case stopped_rsc:
diff -urN pacemaker-dev/pengine/utils.c pacemaker-dev.mod/pengine/
utils.c
--- pacemaker-dev/pengine/utils.c 2008-11-26 10:47:49.000000000 +0900
+++ pacemaker-dev.mod/pengine/utils.c 2008-11-26 10:49:54.000000000
+0900
@@ -338,6 +338,7 @@
case monitor_rsc:
case shutdown_crm:
case stonith_node:
+ case standby_node:
task = no_action;
break;
default:
@@ -430,6 +431,7 @@
switch(text2task(action->task)) {
case stonith_node:
+ case standby_node:
case shutdown_crm:
do_crm_log_unlikely(log_level,
"%s%s%sAction %d: %s%s%s%s%s%s",
<pe-warn-0.left.gif>_______________________________________________
Pacemaker mailing list
[email protected]
http://list.clusterlabs.org/mailman/listinfo/pacemaker