hi,

one of the last pfsync "improvements" (r1.180) by yours truly
introduced some nasty regressions for those running pfsync over
the crossover cable.  i apologize for all the inconveniences.
peter hessler, kapeatanakis giannis and myself have been trying
very hard to get this fixed.

we have identified several problems introduced by the change:

1) failover when backup is rebooted: master has it's link down
   so it's demoted, backup comes up, its link comes up, master
   requests bulk update and demotes by 1 to 2, backup requests
   bulk update and its demotion counter becomes 129; then rc
   undemotes it by 128 and it stays at 1 until bulk update
   finishes.  here it preempts w/o having all the state info
   from the master.

2) race between link state update on master and backup when
   plugging back the cable can trigger failovers: because of
   the demotion dance on the "link up" event, master can
   notice the link coming up faster then the backup and request
   a bulk update which would demote it further and trigger a
   failover.  when backup notices the link it'll demote itself
   to the same level as the master and failover back again, but
   this flipping will produce a considerable disruption in the
   traffic flow.

to address this two problems we have developed two mechanisms:

1) initial demotion by 32 (arbitrary large number) on the first
   bulk update that will last longer than the rc demotion and
   prevent failovers w/o having the full state table.

2) not doing any demotion adjustments on the link up event and
   treating a link down demotion as the bulk update start one
   and undemoting when bulk update finishes (either normally or
   with a timeout.

this change was thorously tested by phesseler and kapeatanakis.
more tests and oks are welcome.

this is a bit newer version of the diff that has the bulk update
cancelation code factored out in a separate function
pfsync_cancel_full_update for readability.  it's also reused in
the code bringing the interface down.

diff --git sys/net/if_pfsync.c sys/net/if_pfsync.c
index 534c9d3..60f9939 100644
--- sys/net/if_pfsync.c
+++ sys/net/if_pfsync.c
@@ -210,6 +210,9 @@ struct pfsync_softc {
 
        struct pfsync_upd_reqs   sc_upd_req_list;
 
+       int                      sc_initial_bulk;
+       int                      sc_link_demoted;
+
        int                      sc_defer;
        struct pfsync_deferrals  sc_deferrals;
        u_int                    sc_deferred;
@@ -254,6 +257,7 @@ void        pfsync_deferred(struct pf_state *, int);
 void   pfsync_undefer(struct pfsync_deferral *, int);
 void   pfsync_defer_tmo(void *);
 
+void   pfsync_cancel_full_update(struct pfsync_softc *);
 void   pfsync_request_full_update(struct pfsync_softc *);
 void   pfsync_request_update(u_int32_t, u_int64_t);
 void   pfsync_update_state_req(struct pf_state *);
@@ -357,6 +361,8 @@ pfsync_clone_destroy(struct ifnet *ifp)
 #if NCARP > 0
        if (!pfsync_sync_ok)
                carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
+       if (sc->sc_link_demoted)
+               carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
 #endif
        if (sc->sc_lhcookie != NULL)
                hook_disestablish(
@@ -414,29 +420,28 @@ pfsync_syncdev_state(void *arg)
 {
        struct pfsync_softc *sc = arg;
 
-       if (!sc->sc_sync_if)
+       if (!sc->sc_sync_if && !(sc->sc_if.if_flags & IFF_UP))
                return;
 
-       if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN ||
-           !(sc->sc_sync_if->if_flags & IFF_UP)) {
+       if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) {
                sc->sc_if.if_flags &= ~IFF_RUNNING;
+               if (!sc->sc_link_demoted) {
 #if NCARP > 0
-               carp_group_demote_adj(&sc->sc_if, 1, "pfsyncdev");
+                       carp_group_demote_adj(&sc->sc_if, 1,
+                           "pfsync link state down");
 #endif
+                       sc->sc_link_demoted = 1;
+               }
+
                /* drop everything */
                timeout_del(&sc->sc_tmo);
                pfsync_drop(sc);
 
-               /* cancel bulk update */
-               timeout_del(&sc->sc_bulk_tmo);
-               sc->sc_bulk_next = NULL;
-               sc->sc_bulk_last = NULL;
-       } else {
+               pfsync_cancel_full_update(sc);
+       } else if (sc->sc_link_demoted) {
                sc->sc_if.if_flags |= IFF_RUNNING;
+
                pfsync_request_full_update(sc);
-#if NCARP > 0
-               carp_group_demote_adj(&sc->sc_if, -1, "pfsyncdev");
-#endif
        }
 }
 
@@ -1149,9 +1154,17 @@ pfsync_in_bus(caddr_t buf, int len, int count, int flags)
 #if NCARP > 0
                        if (!pfsync_sync_ok)
                                carp_group_demote_adj(&sc->sc_if, -1,
+                                   sc->sc_link_demoted ?
+                                   "pfsync link state up" :
                                    "pfsync bulk done");
+                       if (sc->sc_initial_bulk) {
+                               carp_group_demote_adj(&sc->sc_if, -32,
+                                   "pfsync init");
+                               sc->sc_initial_bulk = 0;
+                       }
 #endif
                        pfsync_sync_ok = 1;
+                       sc->sc_link_demoted = 0;
                        DPFPRINTF(LOG_INFO, "received valid bulk update end");
                } else {
                        DPFPRINTF(LOG_WARNING, "received invalid "
@@ -1270,20 +1283,27 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                if ((ifp->if_flags & IFF_RUNNING) == 0 &&
                    (ifp->if_flags & IFF_UP)) {
                        ifp->if_flags |= IFF_RUNNING;
+
+                       sc->sc_initial_bulk = 1;
+                       carp_group_demote_adj(&sc->sc_if, 32, "pfsync init");
+
                        pfsync_request_full_update(sc);
                }
                if ((ifp->if_flags & IFF_RUNNING) &&
                    (ifp->if_flags & IFF_UP) == 0) {
                        ifp->if_flags &= ~IFF_RUNNING;
 
+                       if (sc->sc_initial_bulk) {
+                               carp_group_demote_adj(&sc->sc_if, -32,
+                                   "pfsync init");
+                               sc->sc_initial_bulk = 0;
+                       }
+
                        /* drop everything */
                        timeout_del(&sc->sc_tmo);
                        pfsync_drop(sc);
 
-                       /* cancel bulk update */
-                       timeout_del(&sc->sc_bulk_tmo);
-                       sc->sc_bulk_next = NULL;
-                       sc->sc_bulk_last = NULL;
+                       pfsync_cancel_full_update(sc);
                }
                splx(s);
                break;
@@ -1880,13 +1900,27 @@ pfsync_update_state(struct pf_state *st)
 }
 
 void
+pfsync_cancel_full_update(struct pfsync_softc *sc)
+{
+       if (timeout_pending(&sc->sc_bulkfail_tmo) ||
+           timeout_pending(&sc->sc_bulk_tmo))
+               DPFPRINTF(LOG_INFO, "cancelling bulk update");
+       timeout_del(&sc->sc_bulkfail_tmo);
+       timeout_del(&sc->sc_bulk_tmo);
+       sc->sc_bulk_next = NULL;
+       sc->sc_bulk_last = NULL;
+       sc->sc_ureq_sent = 0;
+       sc->sc_bulk_tries = 0;
+}
+
+void
 pfsync_request_full_update(struct pfsync_softc *sc)
 {
        if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
                /* Request a full state table update. */
                sc->sc_ureq_sent = time_uptime;
 #if NCARP > 0
-               if (pfsync_sync_ok)
+               if (!sc->sc_link_demoted && pfsync_sync_ok)
                        carp_group_demote_adj(&sc->sc_if, 1,
                            "pfsync bulk start");
 #endif
@@ -2274,9 +2308,17 @@ pfsync_bulk_fail(void *arg)
 #if NCARP > 0
                if (!pfsync_sync_ok)
                        carp_group_demote_adj(&sc->sc_if, -1,
+                           sc->sc_link_demoted ?
+                           "pfsync link state up" :
                            "pfsync bulk fail");
+               if (sc->sc_initial_bulk) {
+                       carp_group_demote_adj(&sc->sc_if, -32,
+                           "pfsync init");
+                       sc->sc_initial_bulk = 0;
+               }
 #endif
                pfsync_sync_ok = 1;
+               sc->sc_link_demoted = 0;
                DPFPRINTF(LOG_ERR, "failed to receive bulk update");
        }

Reply via email to