On Fri, Feb 13, 2009 at 11:36:11AM +0100, Philipp Kern wrote: > On Fri, Feb 13, 2009 at 10:54:02AM +0100, Luk Claes wrote: > > Simon Horman wrote: > > > I would like to upload a(nother) fresh version of heartbeat to fix > > > a fairly severe bug. The fix has been in unstable since 2.1.3-7 and > > > was included upstream at around the same time - it seems to be well > > > tested. > > Unfortunately this is too late to make it. Please request to include it > > in r1 after the release, TIA. > > Oh and please attach a debdiff from the version in Lenny to the one with > your fix to the bug report. Your upload should target > 'stable-proposed-updates'. TIA.
Thanks, as Lenny has now been released I have made the upload. The debdiff is below and this email should record it in the BTS. -- Simon Horman VA Linux Systems Japan K.K., Sydney, Australia Satellite Office H: www.vergenet.net/~horms/ W: www.valinux.co.jp/en diff -u heartbeat-2.1.3/version.Debian heartbeat-2.1.3/version.Debian --- heartbeat-2.1.3/version.Debian +++ heartbeat-2.1.3/version.Debian @@ -1 +1 @@ -2.1.3-5 +2.1.3-6lenny1 diff -u heartbeat-2.1.3/debian/changelog heartbeat-2.1.3/debian/changelog --- heartbeat-2.1.3/debian/changelog +++ heartbeat-2.1.3/debian/changelog @@ -1,3 +1,14 @@ +heartbeat (2.1.3-6lenny1) stable-proposed-updates; urgency=low + + * dopd: fix basic failover; fix hb message corruption by fprintf(stderr) + Patch: fix-basic-failover-fix-hb-message-corruption-by-fprintf.patch + Upstream-Status: commit 47f60bebe7b25abd88ea7b5488e66dfe187416ae + "dopd: fix basic failover; fix hb message corruption by + fprintf(stderr)" + (closes: #486071) + + -- Simon Horman <ho...@debian.org> Mon, 16 Feb 2009 02:54:43 +0000 + heartbeat (2.1.3-6lenny0) testing-proposed-updates; urgency=low * heartbeat-gui dependancy on python-xml only in patch2: unchanged: --- heartbeat-2.1.3.orig/debian/patches/dopd-fix-basic-failover-fix-hb-message-corruption-by-fprintf_stderr_.patch +++ heartbeat-2.1.3/debian/patches/dopd-fix-basic-failover-fix-hb-message-corruption-by-fprintf_stderr_.patch @@ -0,0 +1,110 @@ +# HG changeset patch +# User Rasto Levrinc <ra...@linbit.com> +# Date 1206539836 -3600 +# Node ID 47f60bebe7b25abd88ea7b5488e66dfe187416ae +# Parent 17c0cf487322287d0689a036c32f21b900ce5a80 +dopd: fix basic failover; fix hb message corruption by fprintf(stderr) + +check_drbd_peer() used to return FALSE for "node name not in node list", +so drbd-peer-outdater returned "invalid nodename". +Then the semantic changed, and check_drbd_peer learned about "dead" peers +and returned FALSE for them as well. Which made basic failover impossible :( + +The return code was now changed to "peer unreachable" for a dead peer. +And even for nodes which really are not in the host list (and thus could be +classified as invalide), because, after all, thats what they are. +unreachable. + +Node name comparison needs to be case insensitive; fixed. + +During testing with 15 concurrent drbd resources several dopd crashes have been +observed, which after some debugging turned out to be simply a wrong assumption +about the global availability of stderr: some fprintf(stderr, "debug message") +had accidentally used the heartbeat communication channel file descriptor, +which seriously confused the comm layer. +All those fprintfs have now been changed to use cl_log. + +diff -r 17c0cf487322 -r 47f60bebe7b2 contrib/drbd-outdate-peer/dopd.c +--- a/contrib/drbd-outdate-peer/dopd.c Mon Mar 24 16:14:12 2008 +0100 ++++ b/contrib/drbd-outdate-peer/dopd.c Wed Mar 26 14:57:16 2008 +0100 +@@ -202,14 +202,17 @@ + } + + /* check_drbd_peer() +- * walk the nodes and return TRUE if peer is not this node and it exists. ++ * walk the nodes and return ++ * FALSE if peer is not found, not a "normal" node, or "dead" ++ * (no point in trying to reach those nodes). ++ * TRUE if peer is found to be alive and "normal". + */ + gboolean + check_drbd_peer(const char *drbd_peer) + { + const char *node; + gboolean found = FALSE; +- if (!strcmp(drbd_peer, node_name)) { ++ if (!strcasecmp(drbd_peer, node_name)) { + cl_log(LOG_WARNING, "drbd peer node %s is me!\n", drbd_peer); + return FALSE; + } +@@ -306,9 +309,9 @@ + } else + pthread_mutex_unlock(&conn_mutex); + } else { +- /* wrong peer was specified, +- send return code 20 to the client */ +- send_to_client(curr_client, "20"); ++ /* peer "dead" or not in node list. ++ * return "peer could not be reached" */ ++ send_to_client(curr_client, "5"); + } + + ha_msg_del(msg); +diff -r 17c0cf487322 -r 47f60bebe7b2 contrib/drbd-outdate-peer/drbd-peer-outdater.c +--- a/contrib/drbd-outdate-peer/drbd-peer-outdater.c Mon Mar 24 16:14:12 2008 +0100 ++++ b/contrib/drbd-outdate-peer/drbd-peer-outdater.c Wed Mar 26 14:57:16 2008 +0100 +@@ -76,7 +76,7 @@ + + msg = msgfromIPC_noauth(server); + if (!msg) { +- fprintf(stderr, "no message from server or other " ++ cl_log(LOG_WARNING, "no message from server or other " + "instance is running\n"); + if (client->mainloop != NULL && + g_main_is_running(client->mainloop)) +@@ -92,7 +92,7 @@ + errno = 0; + rc = strtol(rc_string, &ep, 10); + if (errno != 0 || *ep != EOS) { +- fprintf(stderr, "unknown message: %s from server", rc_string); ++ cl_log(LOG_WARNING, "unknown message: %s from server", rc_string); + client->rc = 20; /* "officially undefined", unspecified error */ + ha_msg_del(msg); + if (client->mainloop != NULL && +@@ -124,7 +124,7 @@ + outdater_timeout_dispatch(gpointer user_data) + { + dop_client_t *client = (dop_client_t *)user_data; +- fprintf(stderr, "error: could not connect to dopd after %i seconds" ++ cl_log(LOG_WARNING, "error: could not connect to dopd after %i seconds" + ": timeout reached\n", client->timeout); + if (client->mainloop != NULL && g_main_is_running(client->mainloop)) + g_main_quit(client->mainloop); +@@ -255,7 +255,7 @@ + (gpointer)new_client, &ipc_server); + + if (ipc_server == NULL) { +- fprintf(stderr, "Could not connect to "T_OUTDATER" channel\n"); ++ cl_log(LOG_WARNING, "Could not connect to "T_OUTDATER" channel\n"); + dop_exit(new_client); /* unreachable */ + } + +@@ -267,7 +267,7 @@ + ha_msg_add(update, F_OUTDATER_RES, drbd_resource); + + if (msg2ipcchan(update, ipc_server) != HA_OK) { +- fprintf(stderr, "Could not send message\n"); ++ cl_log(LOG_WARNING, "Could not send message\n"); + dop_exit(new_client); + } + only in patch2: unchanged: --- heartbeat-2.1.3.orig/debian/patches/series/2.1.3-6lenny1 +++ heartbeat-2.1.3/debian/patches/series/2.1.3-6lenny1 @@ -0,0 +1 @@ ++ dopd-fix-basic-failover-fix-hb-message-corruption-by-fprintf_stderr_.patch only in patch2: unchanged: --- heartbeat-2.1.3.orig/contrib/drbd-outdate-peer/drbd-peer-outdater.c +++ heartbeat-2.1.3/contrib/drbd-outdate-peer/drbd-peer-outdater.c @@ -76,7 +76,7 @@ msg = msgfromIPC_noauth(server); if (!msg) { - fprintf(stderr, "no message from server or other " + cl_log(LOG_WARNING, "no message from server or other " "instance is running\n"); if (client->mainloop != NULL && g_main_is_running(client->mainloop)) @@ -92,7 +92,7 @@ errno = 0; rc = strtol(rc_string, &ep, 10); if (errno != 0 || *ep != EOS) { - fprintf(stderr, "unknown message: %s from server", rc_string); + cl_log(LOG_WARNING, "unknown message: %s from server", rc_string); client->rc = 20; /* "officially undefined", unspecified error */ ha_msg_del(msg); if (client->mainloop != NULL && @@ -124,7 +124,7 @@ outdater_timeout_dispatch(gpointer user_data) { dop_client_t *client = (dop_client_t *)user_data; - fprintf(stderr, "error: could not connect to dopd after %i seconds" + cl_log(LOG_WARNING, "error: could not connect to dopd after %i seconds" ": timeout reached\n", client->timeout); if (client->mainloop != NULL && g_main_is_running(client->mainloop)) g_main_quit(client->mainloop); @@ -255,7 +255,7 @@ (gpointer)new_client, &ipc_server); if (ipc_server == NULL) { - fprintf(stderr, "Could not connect to "T_OUTDATER" channel\n"); + cl_log(LOG_WARNING, "Could not connect to "T_OUTDATER" channel\n"); dop_exit(new_client); /* unreachable */ } @@ -267,7 +267,7 @@ ha_msg_add(update, F_OUTDATER_RES, drbd_resource); if (msg2ipcchan(update, ipc_server) != HA_OK) { - fprintf(stderr, "Could not send message\n"); + cl_log(LOG_WARNING, "Could not send message\n"); dop_exit(new_client); } only in patch2: unchanged: --- heartbeat-2.1.3.orig/contrib/drbd-outdate-peer/dopd.c +++ heartbeat-2.1.3/contrib/drbd-outdate-peer/dopd.c @@ -202,14 +202,17 @@ } /* check_drbd_peer() - * walk the nodes and return TRUE if peer is not this node and it exists. + * walk the nodes and return + * FALSE if peer is not found, not a "normal" node, or "dead" + * (no point in trying to reach those nodes). + * TRUE if peer is found to be alive and "normal". */ gboolean check_drbd_peer(const char *drbd_peer) { const char *node; gboolean found = FALSE; - if (!strcmp(drbd_peer, node_name)) { + if (!strcasecmp(drbd_peer, node_name)) { cl_log(LOG_WARNING, "drbd peer node %s is me!\n", drbd_peer); return FALSE; } @@ -306,9 +309,9 @@ } else pthread_mutex_unlock(&conn_mutex); } else { - /* wrong peer was specified, - send return code 20 to the client */ - send_to_client(curr_client, "20"); + /* peer "dead" or not in node list. + * return "peer could not be reached" */ + send_to_client(curr_client, "5"); } ha_msg_del(msg); -- To UNSUBSCRIBE, email to debian-bugs-dist-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org