Hello,

thank you for taking a look at it.

</snip>
> 
> I think there is a use after free in you diff.  After you return
> from pfsync_delete_tdb() you must not access the TDB again.
> 
> Comments inline.
> 

</snip>
> >
> >     TAILQ_INIT(&sn->sn_upd_req_list);
> > -   TAILQ_CONCAT(&sn->sn_upd_req_list, &sc->sc_upd_req_list, ur_entry);
> > +   while (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
> > +           ur = TAILQ_FIRST(&sc->sc_upd_req_list);
> 
> Other loops have this idiom
>         while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list) != NULL) {
> 

    new diff version uses 'TAILQ_FIRST()'

</snip>
> > @@ -1827,18 +1853,20 @@ pfsync_sendout(void)
> >             subh = (struct pfsync_subheader *)(m->m_data + offset);
> >             offset += sizeof(*subh);
> >
> > -           mtx_enter(&sc->sc_tdb_mtx);
> >             count = 0;
> >             while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) {
> > -                   TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_entry);
> > +                   TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_snap);
> 
> I think the TDB in tdb_sync_snap list may be freed.  Maybe
> you should grab a refcount if you store them into a list.
> 

    I see. pfsync must grab the reference count in pfsync_update_tdb(),
    where tdb entry is inserted into queue. new diff  fixes that.

> >                     pfsync_out_tdb(t, m->m_data + offset);
> >                     offset += sizeof(struct pfsync_tdb);
> > +#ifdef PFSYNC_DEBUG
> > +                   KASSERT(t->tdb_snapped == 1);
> > +#endif
> > +                   t->tdb_snapped = 0;
> 
> This may also be use after free.

    new diffs drops the reference as soon as pfsync(4) dispatches
    the tdb into pfsync packet.

</snip>
> > @@ -2525,7 +2565,17 @@ pfsync_delete_tdb(struct tdb *t)
> >
> >     mtx_enter(&sc->sc_tdb_mtx);
> >
> > +   /*
> > +    * if tdb entry is just being processed (found in snapshot),
> > +    * then it can not be deleted. we just came too late
> > +    */
> > +   if (t->tdb_snapped) {
> > +           mtx_leave(&sc->sc_tdb_mtx);
> 
> You must not access the TDB after this point.  I thnik you cannot
> guarantee that.  The memory will freed after return.

    new diff fixes that

</snip>
> > diff --git a/sys/netinet/ip_ipsp.h b/sys/netinet/ip_ipsp.h
> > index c697994047b..ebdb6ada1ae 100644
> > --- a/sys/netinet/ip_ipsp.h
> > +++ b/sys/netinet/ip_ipsp.h
> > @@ -405,6 +405,7 @@ struct tdb {                            /* tunnel 
> > descriptor block */
> >     u_int8_t        tdb_wnd;        /* Replay window */
> >     u_int8_t        tdb_satype;     /* SA type (RFC2367, PF_KEY) */
> >     u_int8_t        tdb_updates;    /* pfsync update counter */
> > +   u_int8_t        tdb_snapped;            /* dispatched by pfsync(4) */
> 
> u_int8_t is not atomic.  I you want to change tdb_snapped, you need
> a mutex that also protects the othere fields in the same 32 bit
> word everywhere.  I think a new flag TDBF_PFSYNC_SNAPPED in tdb_flags
> would be easier.  The tdb_flags are protected by tdb_mtx.
> 

    I like your idea.

updated diff is below.


thanks and
regards
sashan

--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c
index cb0f3fbdf52..536c3f9cb70 100644
--- a/sys/net/if_pfsync.c
+++ b/sys/net/if_pfsync.c
@@ -181,6 +181,7 @@ void        pfsync_q_del(struct pf_state *);
 
 struct pfsync_upd_req_item {
        TAILQ_ENTRY(pfsync_upd_req_item)        ur_entry;
+       TAILQ_ENTRY(pfsync_upd_req_item)        ur_snap;
        struct pfsync_upd_req                   ur_msg;
 };
 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
@@ -295,7 +296,7 @@ void        pfsync_bulk_update(void *);
 void   pfsync_bulk_fail(void *);
 
 void   pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *);
-void   pfsync_drop_snapshot(struct pfsync_snapshot *, struct pfsync_softc *);
+void   pfsync_drop_snapshot(struct pfsync_snapshot *);
 
 void   pfsync_send_dispatch(void *);
 void   pfsync_send_pkt(struct mbuf *);
@@ -422,8 +423,7 @@ pfsync_clone_destroy(struct ifnet *ifp)
                sc->sc_deferred = 0;
                mtx_leave(&sc->sc_deferrals_mtx);
 
-               while (!TAILQ_EMPTY(&deferrals)) {
-                       pd = TAILQ_FIRST(&deferrals);
+               while ((pd = TAILQ_FIRST(&deferrals)) != NULL) {
                        TAILQ_REMOVE(&deferrals, pd, pd_entry);
                        pfsync_undefer(pd, 0);
                }
@@ -1574,6 +1574,9 @@ void
 pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc *sc)
 {
        int q;
+       struct pf_state *st;
+       struct pfsync_upd_req_item *ur;
+       struct tdb *tdb;
 
        sn->sn_sc = sc;
 
@@ -1583,14 +1586,36 @@ pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct 
pfsync_softc *sc)
 
        for (q = 0; q < PFSYNC_S_COUNT; q++) {
                TAILQ_INIT(&sn->sn_qs[q]);
-               TAILQ_CONCAT(&sn->sn_qs[q], &sc->sc_qs[q], sync_list);
+
+               while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) {
+#ifdef PFSYNC_DEBUG
+                       KASSERT(st->snapped == 0);
+#endif
+                       TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+                       TAILQ_INSERT_TAIL(&sn->sn_qs[q], st, sync_snap);
+                       st->snapped = 1;
+               }
        }
 
        TAILQ_INIT(&sn->sn_upd_req_list);
-       TAILQ_CONCAT(&sn->sn_upd_req_list, &sc->sc_upd_req_list, ur_entry);
+       while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
+               TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+               TAILQ_INSERT_TAIL(&sn->sn_upd_req_list, ur, ur_snap);
+       }
 
        TAILQ_INIT(&sn->sn_tdb_q);
-       TAILQ_CONCAT(&sn->sn_tdb_q, &sc->sc_tdb_q, tdb_sync_entry);
+       while ((tdb = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) {
+               TAILQ_REMOVE(&sc->sc_tdb_q, tdb, tdb_sync_entry);
+               TAILQ_INSERT_TAIL(&sn->sn_tdb_q, tdb, tdb_sync_snap);
+
+               mtx_enter(&tdb->tdb_mtx);
+#ifdef PFSYNC_DEBUG
+               KASSERT(!ISSET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED));
+#endif
+               /* SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED); */
+               SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED);
+               mtx_leave(&tdb->tdb_mtx);
+       }
 
        sn->sn_len = sc->sc_len;
        sc->sc_len = PFSYNC_MINPKT;
@@ -1606,41 +1631,44 @@ pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct 
pfsync_softc *sc)
 }
 
 void
-pfsync_drop_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc * sc)
+pfsync_drop_snapshot(struct pfsync_snapshot *sn)
 {
        struct pf_state *st;
        struct pfsync_upd_req_item *ur;
        struct tdb *t;
        int q;
 
-
        for (q = 0; q < PFSYNC_S_COUNT; q++) {
                if (TAILQ_EMPTY(&sn->sn_qs[q]))
                        continue;
 
                while ((st = TAILQ_FIRST(&sn->sn_qs[q])) != NULL) {
-                       TAILQ_REMOVE(&sn->sn_qs[q], st, sync_list);
 #ifdef PFSYNC_DEBUG
                        KASSERT(st->sync_state == q);
+                       KASSERT(st->snapped == 1);
 #endif
+                       TAILQ_REMOVE(&sn->sn_qs[q], st, sync_snap);
                        st->sync_state = PFSYNC_S_NONE;
+                       st->snapped = 0;
                        pf_state_unref(st);
                }
        }
 
        while ((ur = TAILQ_FIRST(&sn->sn_upd_req_list)) != NULL) {
-               TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_entry);
+               TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_snap);
                pool_put(&sn->sn_sc->sc_pool, ur);
        }
 
-       mtx_enter(&sc->sc_tdb_mtx);
        while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) {
-               TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_entry);
+               TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_snap);
                mtx_enter(&t->tdb_mtx);
+#ifdef PFSYNC_DEBUG
+               KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED));
+#endif
+               CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED);
                CLR(t->tdb_flags, TDBF_PFSYNC);
                mtx_leave(&t->tdb_mtx);
        }
-       mtx_leave(&sc->sc_tdb_mtx);
 }
 
 int
@@ -1667,7 +1695,7 @@ pfsync_drop(struct pfsync_softc *sc)
        struct pfsync_snapshot  sn;
 
        pfsync_grab_snapshot(&sn, sc);
-       pfsync_drop_snapshot(&sn, sc);
+       pfsync_drop_snapshot(&sn);
 }
 
 void
@@ -1759,7 +1787,7 @@ pfsync_sendout(void)
        if (m == NULL) {
                sc->sc_if.if_oerrors++;
                pfsyncstat_inc(pfsyncs_onomem);
-               pfsync_drop_snapshot(&sn, sc);
+               pfsync_drop_snapshot(&sn);
                return;
        }
 
@@ -1769,7 +1797,7 @@ pfsync_sendout(void)
                        m_free(m);
                        sc->sc_if.if_oerrors++;
                        pfsyncstat_inc(pfsyncs_onomem);
-                       pfsync_drop_snapshot(&sn, sc);
+                       pfsync_drop_snapshot(&sn);
                        return;
                }
        }
@@ -1799,7 +1827,7 @@ pfsync_sendout(void)
 
                count = 0;
                while ((ur = TAILQ_FIRST(&sn.sn_upd_req_list)) != NULL) {
-                       TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_entry);
+                       TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_snap);
 
                        bcopy(&ur->ur_msg, m->m_data + offset,
                            sizeof(ur->ur_msg));
@@ -1827,18 +1855,21 @@ pfsync_sendout(void)
                subh = (struct pfsync_subheader *)(m->m_data + offset);
                offset += sizeof(*subh);
 
-               mtx_enter(&sc->sc_tdb_mtx);
                count = 0;
                while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) {
-                       TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_entry);
+                       TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_snap);
                        pfsync_out_tdb(t, m->m_data + offset);
                        offset += sizeof(struct pfsync_tdb);
                        mtx_enter(&t->tdb_mtx);
+#ifdef PFSYNC_DEBUG
+                       KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED));
+#endif
+                       CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED);
                        CLR(t->tdb_flags, TDBF_PFSYNC);
                        mtx_leave(&t->tdb_mtx);
+                       tdb_unref(t);
                        count++;
                }
-               mtx_leave(&sc->sc_tdb_mtx);
 
                bzero(subh, sizeof(*subh));
                subh->action = PFSYNC_ACT_TDB;
@@ -1856,11 +1887,13 @@ pfsync_sendout(void)
 
                count = 0;
                while ((st = TAILQ_FIRST(&sn.sn_qs[q])) != NULL) {
-                       TAILQ_REMOVE(&sn.sn_qs[q], st, sync_list);
+                       TAILQ_REMOVE(&sn.sn_qs[q], st, sync_snap);
 #ifdef PFSYNC_DEBUG
                        KASSERT(st->sync_state == q);
+                       KASSERT(st->snapped == 1);
 #endif
                        st->sync_state = PFSYNC_S_NONE;
+                       st->snapped = 0;
                        pfsync_qs[q].write(st, m->m_data + offset);
                        offset += pfsync_qs[q].len;
 
@@ -2411,8 +2444,9 @@ pfsync_q_ins(struct pf_state *st, int q)
                mtx_enter(&sc->sc_st_mtx);
 
                /*
-                * If two threads are competing to insert the same state, then
-                * there must be just single winner.
+                * There are either two threads trying to update the
+                * the same state, or the state is just being processed
+                * (is on snapshot queue).
                 */
                if (st->sync_state != PFSYNC_S_NONE) {
                        mtx_leave(&sc->sc_st_mtx);
@@ -2450,6 +2484,15 @@ pfsync_q_del(struct pf_state *st)
 
        mtx_enter(&sc->sc_st_mtx);
        q = st->sync_state;
+       /*
+        * re-check under mutex
+        * if state is snapped already, then just bail out, because we came
+        * too late, the state is being just processed/dispatched to peer.
+        */
+       if ((q == PFSYNC_S_NONE) || (st->snapped)) {
+               mtx_leave(&sc->sc_st_mtx);
+               return;
+       }
        atomic_sub_long(&sc->sc_len, pfsync_qs[q].len);
        TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
        if (TAILQ_EMPTY(&sc->sc_qs[q]))
@@ -2495,6 +2538,7 @@ pfsync_update_tdb(struct tdb *t, int output)
                        }
 
                        TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
+                       tdb_ref(t);
                        SET(t->tdb_flags, TDBF_PFSYNC);
                        mtx_leave(&t->tdb_mtx);
 
@@ -2525,11 +2569,23 @@ pfsync_delete_tdb(struct tdb *t)
 
        mtx_enter(&sc->sc_tdb_mtx);
 
+       /*
+        * if tdb entry is just being processed (found in snapshot),
+        * then it can not be deleted. we just came too late
+        */
+       if (ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)) {
+               mtx_leave(&sc->sc_tdb_mtx);
+               return;
+       }
+
        TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
+
        mtx_enter(&t->tdb_mtx);
        CLR(t->tdb_flags, TDBF_PFSYNC);
        mtx_leave(&t->tdb_mtx);
 
+       tdb_unref(t);
+
        nlen = sizeof(struct pfsync_tdb);
        if (TAILQ_EMPTY(&sc->sc_tdb_q))
                nlen += sizeof(struct pfsync_subheader);
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index bd7ec1d88e7..558618a0f14 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -749,6 +749,7 @@ struct pf_state {
        u_int8_t                 pad[3];
 
        TAILQ_ENTRY(pf_state)    sync_list;
+       TAILQ_ENTRY(pf_state)    sync_snap;
        TAILQ_ENTRY(pf_state)    entry_list;
        SLIST_ENTRY(pf_state)    gc_list;
        RB_ENTRY(pf_state)       entry_id;
@@ -797,6 +798,7 @@ struct pf_state {
        pf_refcnt_t              refcnt;
        u_int16_t                delay;
        u_int8_t                 rt;
+       u_int8_t                 snapped;
 };
 
 /*
diff --git a/sys/netinet/ip_ipsp.h b/sys/netinet/ip_ipsp.h
index c697994047b..fa46a1e1282 100644
--- a/sys/netinet/ip_ipsp.h
+++ b/sys/netinet/ip_ipsp.h
@@ -355,6 +355,7 @@ struct tdb {                                /* tunnel 
descriptor block */
 #define        TDBF_PFSYNC             0x40000 /* TDB will be synced */
 #define        TDBF_PFSYNC_RPL         0x80000 /* Replay counter should be 
bumped */
 #define        TDBF_ESN                0x100000 /* 64-bit sequence numbers 
(ESN) */
+#define        TDBF_PFSYNC_SNAPPED     0x200000 /* entry is being dispatched 
to peer */
 
 #define TDBF_BITS ("\20" \
        "\1UNIQUE\2TIMER\3BYTES\4ALLOCATIONS" \
@@ -439,6 +440,7 @@ struct tdb {                                /* tunnel 
descriptor block */
 
        TAILQ_HEAD(tdb_policy_head, ipsec_policy) tdb_policy_head; /* [p] */
        TAILQ_ENTRY(tdb)        tdb_sync_entry;
+       TAILQ_ENTRY(tdb)        tdb_sync_snap;
 };
 
 enum tdb_counters {

Reply via email to