hi, one of the last pfsync "improvements" (r1.180) by yours truly introduced some nasty regressions for those running pfsync over the crossover cable. i apologize for all the inconveniences. peter hessler, kapeatanakis giannis and myself have been trying very hard to get this fixed.
we have identified several problems introduced by the change: 1) failover when backup is rebooted: master has it's link down so it's demoted, backup comes up, its link comes up, master requests bulk update and demotes by 1 to 2, backup requests bulk update and its demotion counter becomes 129; then rc undemotes it by 128 and it stays at 1 until bulk update finishes. here it preempts w/o having all the state info from the master. 2) race between link state update on master and backup when plugging back the cable can trigger failovers: because of the demotion dance on the "link up" event, master can notice the link coming up faster then the backup and request a bulk update which would demote it further and trigger a failover. when backup notices the link it'll demote itself to the same level as the master and failover back again, but this flipping will produce a considerable disruption in the traffic flow. to address this two problems we have developed two mechanisms: 1) initial demotion by 32 (arbitrary large number) on the first bulk update that will last longer than the rc demotion and prevent failovers w/o having the full state table. 2) not doing any demotion adjustments on the link up event and treating a link down demotion as the bulk update start one and undemoting when bulk update finishes (either normally or with a timeout. this change was thorously tested by phesseler and kapeatanakis. more tests and oks are welcome. this is a bit newer version of the diff that has the bulk update cancelation code factored out in a separate function pfsync_cancel_full_update for readability. it's also reused in the code bringing the interface down. diff --git sys/net/if_pfsync.c sys/net/if_pfsync.c index 534c9d3..60f9939 100644 --- sys/net/if_pfsync.c +++ sys/net/if_pfsync.c @@ -210,6 +210,9 @@ struct pfsync_softc { struct pfsync_upd_reqs sc_upd_req_list; + int sc_initial_bulk; + int sc_link_demoted; + int sc_defer; struct pfsync_deferrals sc_deferrals; u_int sc_deferred; @@ -254,6 +257,7 @@ void pfsync_deferred(struct pf_state *, int); void pfsync_undefer(struct pfsync_deferral *, int); void pfsync_defer_tmo(void *); +void pfsync_cancel_full_update(struct pfsync_softc *); void pfsync_request_full_update(struct pfsync_softc *); void pfsync_request_update(u_int32_t, u_int64_t); void pfsync_update_state_req(struct pf_state *); @@ -357,6 +361,8 @@ pfsync_clone_destroy(struct ifnet *ifp) #if NCARP > 0 if (!pfsync_sync_ok) carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); + if (sc->sc_link_demoted) + carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); #endif if (sc->sc_lhcookie != NULL) hook_disestablish( @@ -414,29 +420,28 @@ pfsync_syncdev_state(void *arg) { struct pfsync_softc *sc = arg; - if (!sc->sc_sync_if) + if (!sc->sc_sync_if && !(sc->sc_if.if_flags & IFF_UP)) return; - if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN || - !(sc->sc_sync_if->if_flags & IFF_UP)) { + if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) { sc->sc_if.if_flags &= ~IFF_RUNNING; + if (!sc->sc_link_demoted) { #if NCARP > 0 - carp_group_demote_adj(&sc->sc_if, 1, "pfsyncdev"); + carp_group_demote_adj(&sc->sc_if, 1, + "pfsync link state down"); #endif + sc->sc_link_demoted = 1; + } + /* drop everything */ timeout_del(&sc->sc_tmo); pfsync_drop(sc); - /* cancel bulk update */ - timeout_del(&sc->sc_bulk_tmo); - sc->sc_bulk_next = NULL; - sc->sc_bulk_last = NULL; - } else { + pfsync_cancel_full_update(sc); + } else if (sc->sc_link_demoted) { sc->sc_if.if_flags |= IFF_RUNNING; + pfsync_request_full_update(sc); -#if NCARP > 0 - carp_group_demote_adj(&sc->sc_if, -1, "pfsyncdev"); -#endif } } @@ -1149,9 +1154,17 @@ pfsync_in_bus(caddr_t buf, int len, int count, int flags) #if NCARP > 0 if (!pfsync_sync_ok) carp_group_demote_adj(&sc->sc_if, -1, + sc->sc_link_demoted ? + "pfsync link state up" : "pfsync bulk done"); + if (sc->sc_initial_bulk) { + carp_group_demote_adj(&sc->sc_if, -32, + "pfsync init"); + sc->sc_initial_bulk = 0; + } #endif pfsync_sync_ok = 1; + sc->sc_link_demoted = 0; DPFPRINTF(LOG_INFO, "received valid bulk update end"); } else { DPFPRINTF(LOG_WARNING, "received invalid " @@ -1270,20 +1283,27 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) if ((ifp->if_flags & IFF_RUNNING) == 0 && (ifp->if_flags & IFF_UP)) { ifp->if_flags |= IFF_RUNNING; + + sc->sc_initial_bulk = 1; + carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); + pfsync_request_full_update(sc); } if ((ifp->if_flags & IFF_RUNNING) && (ifp->if_flags & IFF_UP) == 0) { ifp->if_flags &= ~IFF_RUNNING; + if (sc->sc_initial_bulk) { + carp_group_demote_adj(&sc->sc_if, -32, + "pfsync init"); + sc->sc_initial_bulk = 0; + } + /* drop everything */ timeout_del(&sc->sc_tmo); pfsync_drop(sc); - /* cancel bulk update */ - timeout_del(&sc->sc_bulk_tmo); - sc->sc_bulk_next = NULL; - sc->sc_bulk_last = NULL; + pfsync_cancel_full_update(sc); } splx(s); break; @@ -1880,13 +1900,27 @@ pfsync_update_state(struct pf_state *st) } void +pfsync_cancel_full_update(struct pfsync_softc *sc) +{ + if (timeout_pending(&sc->sc_bulkfail_tmo) || + timeout_pending(&sc->sc_bulk_tmo)) + DPFPRINTF(LOG_INFO, "cancelling bulk update"); + timeout_del(&sc->sc_bulkfail_tmo); + timeout_del(&sc->sc_bulk_tmo); + sc->sc_bulk_next = NULL; + sc->sc_bulk_last = NULL; + sc->sc_ureq_sent = 0; + sc->sc_bulk_tries = 0; +} + +void pfsync_request_full_update(struct pfsync_softc *sc) { if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { /* Request a full state table update. */ sc->sc_ureq_sent = time_uptime; #if NCARP > 0 - if (pfsync_sync_ok) + if (!sc->sc_link_demoted && pfsync_sync_ok) carp_group_demote_adj(&sc->sc_if, 1, "pfsync bulk start"); #endif @@ -2274,9 +2308,17 @@ pfsync_bulk_fail(void *arg) #if NCARP > 0 if (!pfsync_sync_ok) carp_group_demote_adj(&sc->sc_if, -1, + sc->sc_link_demoted ? + "pfsync link state up" : "pfsync bulk fail"); + if (sc->sc_initial_bulk) { + carp_group_demote_adj(&sc->sc_if, -32, + "pfsync init"); + sc->sc_initial_bulk = 0; + } #endif pfsync_sync_ok = 1; + sc->sc_link_demoted = 0; DPFPRINTF(LOG_ERR, "failed to receive bulk update"); }