On Wed, Apr 20, 2022 at 11:22:27PM +0200, Alexandr Nedvedicky wrote: > updated diff is below
OK bluhm@ You have to merge again, as I removed #ifdef PFSYNC_DEBUG and added a #ifdef DIAGNOSTIC. Sorry. > --------8<---------------8<---------------8<------------------8<-------- > diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c > index fc6843b541f..3061318cec9 100644 > --- a/sys/net/if_pfsync.c > +++ b/sys/net/if_pfsync.c > @@ -181,6 +181,7 @@ void pfsync_q_del(struct pf_state *); > > struct pfsync_upd_req_item { > TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; > + TAILQ_ENTRY(pfsync_upd_req_item) ur_snap; > struct pfsync_upd_req ur_msg; > }; > TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); > @@ -295,7 +296,7 @@ void pfsync_bulk_update(void *); > void pfsync_bulk_fail(void *); > > void pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *); > -void pfsync_drop_snapshot(struct pfsync_snapshot *, struct pfsync_softc *); > +void pfsync_drop_snapshot(struct pfsync_snapshot *); > > void pfsync_send_dispatch(void *); > void pfsync_send_pkt(struct mbuf *); > @@ -422,8 +423,7 @@ pfsync_clone_destroy(struct ifnet *ifp) > sc->sc_deferred = 0; > mtx_leave(&sc->sc_deferrals_mtx); > > - while (!TAILQ_EMPTY(&deferrals)) { > - pd = TAILQ_FIRST(&deferrals); > + while ((pd = TAILQ_FIRST(&deferrals)) != NULL) { > TAILQ_REMOVE(&deferrals, pd, pd_entry); > pfsync_undefer(pd, 0); > } > @@ -1574,6 +1574,9 @@ void > pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc *sc) > { > int q; > + struct pf_state *st; > + struct pfsync_upd_req_item *ur; > + struct tdb *tdb; > > sn->sn_sc = sc; > > @@ -1583,14 +1586,31 @@ pfsync_grab_snapshot(struct pfsync_snapshot *sn, > struct pfsync_softc *sc) > > for (q = 0; q < PFSYNC_S_COUNT; q++) { > TAILQ_INIT(&sn->sn_qs[q]); > - TAILQ_CONCAT(&sn->sn_qs[q], &sc->sc_qs[q], sync_list); > + > + while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { > + KASSERT(st->snapped == 0); > + TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); > + TAILQ_INSERT_TAIL(&sn->sn_qs[q], st, sync_snap); > + st->snapped = 1; > + } > } > > TAILQ_INIT(&sn->sn_upd_req_list); > - TAILQ_CONCAT(&sn->sn_upd_req_list, &sc->sc_upd_req_list, ur_entry); > + while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { > + TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); > + TAILQ_INSERT_TAIL(&sn->sn_upd_req_list, ur, ur_snap); > + } > > TAILQ_INIT(&sn->sn_tdb_q); > - TAILQ_CONCAT(&sn->sn_tdb_q, &sc->sc_tdb_q, tdb_sync_entry); > + while ((tdb = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { > + TAILQ_REMOVE(&sc->sc_tdb_q, tdb, tdb_sync_entry); > + TAILQ_INSERT_TAIL(&sn->sn_tdb_q, tdb, tdb_sync_snap); > + > + mtx_enter(&tdb->tdb_mtx); > + KASSERT(!ISSET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED)); > + SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED); > + mtx_leave(&tdb->tdb_mtx); > + } > > sn->sn_len = sc->sc_len; > sc->sc_len = PFSYNC_MINPKT; > @@ -1606,41 +1626,40 @@ pfsync_grab_snapshot(struct pfsync_snapshot *sn, > struct pfsync_softc *sc) > } > > void > -pfsync_drop_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc * sc) > +pfsync_drop_snapshot(struct pfsync_snapshot *sn) > { > struct pf_state *st; > struct pfsync_upd_req_item *ur; > struct tdb *t; > int q; > > - > for (q = 0; q < PFSYNC_S_COUNT; q++) { > if (TAILQ_EMPTY(&sn->sn_qs[q])) > continue; > > while ((st = TAILQ_FIRST(&sn->sn_qs[q])) != NULL) { > - TAILQ_REMOVE(&sn->sn_qs[q], st, sync_list); > -#ifdef PFSYNC_DEBUG > KASSERT(st->sync_state == q); > -#endif > + KASSERT(st->snapped == 1); > + TAILQ_REMOVE(&sn->sn_qs[q], st, sync_snap); > st->sync_state = PFSYNC_S_NONE; > + st->snapped = 0; > pf_state_unref(st); > } > } > > while ((ur = TAILQ_FIRST(&sn->sn_upd_req_list)) != NULL) { > - TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_entry); > + TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_snap); > pool_put(&sn->sn_sc->sc_pool, ur); > } > > - mtx_enter(&sc->sc_tdb_mtx); > while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) { > - TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_entry); > + TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_snap); > mtx_enter(&t->tdb_mtx); > + KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)); > + CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED); > CLR(t->tdb_flags, TDBF_PFSYNC); > mtx_leave(&t->tdb_mtx); > } > - mtx_leave(&sc->sc_tdb_mtx); > } > > int > @@ -1667,7 +1686,7 @@ pfsync_drop(struct pfsync_softc *sc) > struct pfsync_snapshot sn; > > pfsync_grab_snapshot(&sn, sc); > - pfsync_drop_snapshot(&sn, sc); > + pfsync_drop_snapshot(&sn); > } > > void > @@ -1759,7 +1778,7 @@ pfsync_sendout(void) > if (m == NULL) { > sc->sc_if.if_oerrors++; > pfsyncstat_inc(pfsyncs_onomem); > - pfsync_drop_snapshot(&sn, sc); > + pfsync_drop_snapshot(&sn); > return; > } > > @@ -1769,7 +1788,7 @@ pfsync_sendout(void) > m_free(m); > sc->sc_if.if_oerrors++; > pfsyncstat_inc(pfsyncs_onomem); > - pfsync_drop_snapshot(&sn, sc); > + pfsync_drop_snapshot(&sn); > return; > } > } > @@ -1799,7 +1818,7 @@ pfsync_sendout(void) > > count = 0; > while ((ur = TAILQ_FIRST(&sn.sn_upd_req_list)) != NULL) { > - TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_entry); > + TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_snap); > > bcopy(&ur->ur_msg, m->m_data + offset, > sizeof(ur->ur_msg)); > @@ -1827,18 +1846,19 @@ pfsync_sendout(void) > subh = (struct pfsync_subheader *)(m->m_data + offset); > offset += sizeof(*subh); > > - mtx_enter(&sc->sc_tdb_mtx); > count = 0; > while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) { > - TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_entry); > + TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_snap); > pfsync_out_tdb(t, m->m_data + offset); > offset += sizeof(struct pfsync_tdb); > mtx_enter(&t->tdb_mtx); > + KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)); > + CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED); > CLR(t->tdb_flags, TDBF_PFSYNC); > mtx_leave(&t->tdb_mtx); > + tdb_unref(t); > count++; > } > - mtx_leave(&sc->sc_tdb_mtx); > > bzero(subh, sizeof(*subh)); > subh->action = PFSYNC_ACT_TDB; > @@ -1856,11 +1876,11 @@ pfsync_sendout(void) > > count = 0; > while ((st = TAILQ_FIRST(&sn.sn_qs[q])) != NULL) { > - TAILQ_REMOVE(&sn.sn_qs[q], st, sync_list); > -#ifdef PFSYNC_DEBUG > + TAILQ_REMOVE(&sn.sn_qs[q], st, sync_snap); > KASSERT(st->sync_state == q); > -#endif > + KASSERT(st->snapped == 1); > st->sync_state = PFSYNC_S_NONE; > + st->snapped = 0; > pfsync_qs[q].write(st, m->m_data + offset); > offset += pfsync_qs[q].len; > > @@ -1916,9 +1936,7 @@ pfsync_insert_state(struct pf_state *st) > ISSET(st->state_flags, PFSTATE_NOSYNC)) > return; > > -#ifdef PFSYNC_DEBUG > KASSERT(st->sync_state == PFSYNC_S_NONE); > -#endif > > if (sc->sc_len == PFSYNC_MINPKT) > timeout_add_sec(&sc->sc_tmo, 1); > @@ -2403,16 +2421,15 @@ pfsync_q_ins(struct pf_state *st, int q) > struct pfsync_softc *sc = pfsyncif; > size_t nlen, sclen; > > -#if defined(PFSYNC_DEBUG) > if (sc->sc_len < PFSYNC_MINPKT) > panic("pfsync pkt len is too low %zd", sc->sc_len); > -#endif > do { > mtx_enter(&sc->sc_st_mtx); > > /* > - * If two threads are competing to insert the same state, then > - * there must be just single winner. > + * There are either two threads trying to update the > + * the same state, or the state is just being processed > + * (is on snapshot queue). > */ > if (st->sync_state != PFSYNC_S_NONE) { > mtx_leave(&sc->sc_st_mtx); > @@ -2450,6 +2467,15 @@ pfsync_q_del(struct pf_state *st) > > mtx_enter(&sc->sc_st_mtx); > q = st->sync_state; > + /* > + * re-check under mutex > + * if state is snapped already, then just bail out, because we came > + * too late, the state is being just processed/dispatched to peer. > + */ > + if ((q == PFSYNC_S_NONE) || (st->snapped)) { > + mtx_leave(&sc->sc_st_mtx); > + return; > + } > atomic_sub_long(&sc->sc_len, pfsync_qs[q].len); > TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); > if (TAILQ_EMPTY(&sc->sc_qs[q])) > @@ -2495,6 +2521,7 @@ pfsync_update_tdb(struct tdb *t, int output) > } > > TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); > + tdb_ref(t); > SET(t->tdb_flags, TDBF_PFSYNC); > mtx_leave(&t->tdb_mtx); > > @@ -2525,7 +2552,17 @@ pfsync_delete_tdb(struct tdb *t) > > mtx_enter(&sc->sc_tdb_mtx); > > + /* > + * if tdb entry is just being processed (found in snapshot), > + * then it can not be deleted. we just came too late > + */ > + if (ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)) { > + mtx_leave(&sc->sc_tdb_mtx); > + return; > + } > + > TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); > + > mtx_enter(&t->tdb_mtx); > CLR(t->tdb_flags, TDBF_PFSYNC); > mtx_leave(&t->tdb_mtx); > @@ -2536,6 +2573,8 @@ pfsync_delete_tdb(struct tdb *t) > atomic_sub_long(&sc->sc_len, nlen); > > mtx_leave(&sc->sc_tdb_mtx); > + > + tdb_unref(t); > } > > void > diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h > index bd7ec1d88e7..558618a0f14 100644 > --- a/sys/net/pfvar.h > +++ b/sys/net/pfvar.h > @@ -749,6 +749,7 @@ struct pf_state { > u_int8_t pad[3]; > > TAILQ_ENTRY(pf_state) sync_list; > + TAILQ_ENTRY(pf_state) sync_snap; > TAILQ_ENTRY(pf_state) entry_list; > SLIST_ENTRY(pf_state) gc_list; > RB_ENTRY(pf_state) entry_id; > @@ -797,6 +798,7 @@ struct pf_state { > pf_refcnt_t refcnt; > u_int16_t delay; > u_int8_t rt; > + u_int8_t snapped; > }; > > /* > diff --git a/sys/netinet/ip_ipsp.h b/sys/netinet/ip_ipsp.h > index c697994047b..fa46a1e1282 100644 > --- a/sys/netinet/ip_ipsp.h > +++ b/sys/netinet/ip_ipsp.h > @@ -355,6 +355,7 @@ struct tdb { /* tunnel > descriptor block */ > #define TDBF_PFSYNC 0x40000 /* TDB will be synced */ > #define TDBF_PFSYNC_RPL 0x80000 /* Replay counter should be > bumped */ > #define TDBF_ESN 0x100000 /* 64-bit sequence numbers > (ESN) */ > +#define TDBF_PFSYNC_SNAPPED 0x200000 /* entry is being dispatched > to peer */ > > #define TDBF_BITS ("\20" \ > "\1UNIQUE\2TIMER\3BYTES\4ALLOCATIONS" \ > @@ -439,6 +440,7 @@ struct tdb { /* tunnel > descriptor block */ > > TAILQ_HEAD(tdb_policy_head, ipsec_policy) tdb_policy_head; /* [p] */ > TAILQ_ENTRY(tdb) tdb_sync_entry; > + TAILQ_ENTRY(tdb) tdb_sync_snap; > }; > > enum tdb_counters {