this needs testing by people using sticky-address. if we don't get
this in sticky-address will be broken in 4.7.

----- Forwarded message from Henning Brauer <henn...@openbsd.org> -----

Date: Thu, 26 Nov 2009 00:10:15 +0100
From: Henning Brauer <henn...@openbsd.org>
Subject: unbreak sticky-address
X-PGP-Key: 3A83DF32
User-Agent: Mutt/1.5.20 (2009-06-14)

so. sticky-adress is broken beyond relief. thsi basically reimplements
it... kinda. we need one source tracking node (src node) per state for
each of:
-limit # of states per src addr (this can be global or per rule)
-nat with sticky-addr (always per rule)
-rdr with sticky-addr (always per rule)

match CONSIDERABLY complicates things here. since we do the address
selection on the fly while traversing the ruleset and the rule is part
of the key in the src nodes RB tree... you get the idea. we dunno the
last matching one yet. good news: we don't really need to.

to not grow the states much (actually, not at all) i chose to hang the
src nodes off the stack in an SLIST. typically one entry, max of 4.

# pfctl -sr
match out on em1 inet all nat-to { 172.16.8.8, 172.16.8.9, 172.16.8.10
} round-robin sticky-address
pass all flags S/SA keep state
# pfctl -vvsS
172.16.7.1 nat-to 172.16.8.9 ( states 10, connections 0, rate 0.0/0s )
   age 00:00:09, 263960 pkts, 231156744 bytes, rule 0

----- End forwarded message -----

Index: sys/net/if_pfsync.c
===================================================================
RCS file: /cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.133
diff -u -p -r1.133 if_pfsync.c
--- sys/net/if_pfsync.c 23 Nov 2009 16:03:10 -0000      1.133
+++ sys/net/if_pfsync.c 2 Dec 2009 14:15:29 -0000
@@ -428,7 +428,7 @@ pfsync_state_export(struct pfsync_state 
        sp->log = st->log;
        sp->timeout = st->timeout;
        sp->state_flags = st->state_flags;
-       if (st->src_node)
+       if (!SLIST_EMPTY(&st->src_nodes))
                sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
 
        bcopy(&st->id, &sp->id, sizeof(sp->id));
Index: sys/net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.677
diff -u -p -r1.677 pf.c
--- sys/net/pf.c        26 Nov 2009 14:34:49 -0000      1.677
+++ sys/net/pf.c        2 Dec 2009 14:15:31 -0000
@@ -129,7 +129,7 @@ struct pf_anchor_stackframe {
 
 struct pool             pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
 struct pool             pf_state_pl, pf_state_key_pl, pf_state_item_pl;
-struct pool             pf_altq_pl, pf_rule_item_pl;
+struct pool             pf_altq_pl, pf_rule_item_pl, pf_sn_item_pl;
 
 void                    pf_init_threshold(struct pf_threshold *, u_int32_t,
                            u_int32_t);
@@ -175,7 +175,7 @@ static __inline int  pf_create_state(str
                            u_int16_t, int *, struct pfi_kif *,
                            struct pf_state **, int, u_int16_t, u_int16_t,
                            int, struct pf_rule_slist *,
-                           struct pf_rule_actions *);
+                           struct pf_rule_actions *, struct pf_src_node *[]);
 void                    pf_translate(struct pf_pdesc *, struct pf_addr *,
                            u_int16_t, struct pf_addr *, u_int16_t, u_int16_t,
                            int, struct mbuf *, int);
@@ -316,6 +316,8 @@ pf_src_compare(struct pf_src_node *a, st
                return (1);
        if (a->rule.ptr < b->rule.ptr)
                return (-1);
+       if ((diff = a->type - b->type) != 0)
+               return (diff);
        if ((diff = a->af - b->af) != 0)
                return (diff);
        switch (a->af) {
@@ -404,21 +406,24 @@ pf_check_threshold(struct pf_threshold *
 int
 pf_src_connlimit(struct pf_state **state)
 {
-       int bad = 0;
+       int                      bad = 0;
+       struct pf_src_node      *sn;
+
+       if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL)
+               return (0);
 
-       (*state)->src_node->conn++;
+       sn->conn++;
        (*state)->src.tcp_est = 1;
-       pf_add_threshold(&(*state)->src_node->conn_rate);
+       pf_add_threshold(&sn->conn_rate);
 
        if ((*state)->rule.ptr->max_src_conn &&
-           (*state)->rule.ptr->max_src_conn <
-           (*state)->src_node->conn) {
+           (*state)->rule.ptr->max_src_conn < sn->conn) {
                pf_status.lcounters[LCNT_SRCCONN]++;
                bad++;
        }
 
        if ((*state)->rule.ptr->max_src_conn_rate.limit &&
-           pf_check_threshold(&(*state)->src_node->conn_rate)) {
+           pf_check_threshold(&sn->conn_rate)) {
                pf_status.lcounters[LCNT_SRCCONNRATE]++;
                bad++;
        }
@@ -433,7 +438,7 @@ pf_src_connlimit(struct pf_state **state
                pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
                if (pf_status.debug >= PF_DEBUG_MISC) {
                        printf("pf_src_connlimit: blocking address ");
-                       pf_print_host(&(*state)->src_node->addr, 0,
+                       pf_print_host(&sn->addr, 0,
                            (*state)->key[PF_SK_WIRE]->af);
                }
 
@@ -443,13 +448,13 @@ pf_src_connlimit(struct pf_state **state
 #ifdef INET
                case AF_INET:
                        p.pfra_net = 32;
-                       p.pfra_ip4addr = (*state)->src_node->addr.v4;
+                       p.pfra_ip4addr = sn->addr.v4;
                        break;
 #endif /* INET */
 #ifdef INET6
                case AF_INET6:
                        p.pfra_net = 128;
-                       p.pfra_ip6addr = (*state)->src_node->addr.v6;
+                       p.pfra_ip6addr = sn->addr.v6;
                        break;
 #endif /* INET6 */
                }
@@ -473,11 +478,9 @@ pf_src_connlimit(struct pf_state **state
                                if (sk->af ==
                                    (*state)->key[PF_SK_WIRE]->af &&
                                    (((*state)->direction == PF_OUT &&
-                                   PF_AEQ(&(*state)->src_node->addr,
-                                       &sk->addr[0], sk->af)) ||
+                                   PF_AEQ(&sn->addr, &sk->addr[0], sk->af)) ||
                                    ((*state)->direction == PF_IN &&
-                                   PF_AEQ(&(*state)->src_node->addr,
-                                       &sk->addr[1], sk->af))) &&
+                                   PF_AEQ(&sn->addr, &sk->addr[1], sk->af))) &&
                                    ((*state)->rule.ptr->flush &
                                    PF_FLUSH_GLOBAL ||
                                    (*state)->rule.ptr == st->rule.ptr)) {
@@ -502,19 +505,19 @@ pf_src_connlimit(struct pf_state **state
 
 int
 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
-    struct pf_addr *src, sa_family_t af)
+    enum pf_sn_types type, sa_family_t af, struct pf_addr *src,
+    struct pf_addr *raddr, int global)
 {
        struct pf_src_node      k;
 
        if (*sn == NULL) {
                k.af = af;
+               k.type = type;
                PF_ACPY(&k.addr, src, af);
-               if (rule->rule_flag & PFRULE_RULESRCTRACK ||
-                   rule->nat.opts & PF_POOL_STICKYADDR ||
-                   rule->rdr.opts & PF_POOL_STICKYADDR)
-                       k.rule.ptr = rule;
-               else
+               if (global)
                        k.rule.ptr = NULL;
+               else
+                       k.rule.ptr = rule;
                pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
                *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
        }
@@ -531,14 +534,15 @@ pf_insert_src_node(struct pf_src_node **
                    rule->max_src_conn_rate.limit,
                    rule->max_src_conn_rate.seconds);
 
+               (*sn)->type = type;
                (*sn)->af = af;
-               if (rule->rule_flag & PFRULE_RULESRCTRACK ||
-                   rule->nat.opts & PF_POOL_STICKYADDR ||
-                   rule->rdr.opts & PF_POOL_STICKYADDR)
-                       (*sn)->rule.ptr = rule;
-               else
+               if (global)
                        (*sn)->rule.ptr = NULL;
+               else
+                       (*sn)->rule.ptr = rule;
                PF_ACPY(&(*sn)->addr, src, af);
+               if (raddr)
+                       PF_ACPY(&(*sn)->raddr, raddr, af);
                if (RB_INSERT(pf_src_tree,
                    &tree_src_tracking, *sn) != NULL) {
                        if (pf_status.debug >= PF_DEBUG_MISC) {
@@ -550,7 +554,6 @@ pf_insert_src_node(struct pf_src_node **
                        return (-1);
                }
                (*sn)->creation = time_second;
-               (*sn)->ruletype = rule->action;
                if ((*sn)->rule.ptr != NULL)
                        (*sn)->rule.ptr->src_nodes++;
                pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
@@ -565,6 +568,54 @@ pf_insert_src_node(struct pf_src_node **
        return (0);
 }
 
+void
+pf_remove_src_node(struct pf_src_node *sn)
+{
+       if (sn->states > 0 || sn->expire > time_second)
+               return;
+
+       if (sn->rule.ptr != NULL) {
+               sn->rule.ptr->src_nodes--;
+               if (sn->rule.ptr->states_cur <= 0 &&
+                   sn->rule.ptr->max_src_nodes <= 0)
+                       pf_rm_rule(NULL, sn->rule.ptr);
+               RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
+               pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+               pf_status.src_nodes--;
+               pool_put(&pf_src_tree_pl, sn);
+       }
+}
+
+struct pf_src_node *
+pf_get_src_node(struct pf_state *s, enum pf_sn_types type)
+{
+       struct pf_sn_item       *sni;
+
+       SLIST_FOREACH(sni, &s->src_nodes, next)
+               if (sni->sn->type == type)
+                       return (sni->sn);
+       return (NULL);
+}
+
+void
+pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn)
+{
+       struct pf_sn_item       *sni, *snip, *snin;
+
+       for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) {
+               snin = SLIST_NEXT(sni, next);
+               if (sni->sn == sn) {
+                       if (snip)
+                               SLIST_REMOVE_NEXT(&s->src_nodes, snip, next);
+                       else
+                               SLIST_REMOVE_HEAD(&s->src_nodes, next);
+                       pool_put(&pf_sn_item_pl, sni);
+                       sn->states--;
+               }
+               snip = sni;
+       }
+}
+
 /* state table stuff */
 
 static __inline int
@@ -1063,16 +1114,7 @@ pf_purge_expired_src_nodes(int waslocked
                                    &tree_src_tracking, cur);
                                locked = 1;
                        }
-                       if (cur->rule.ptr != NULL) {
-                               cur->rule.ptr->src_nodes--;
-                               if (cur->rule.ptr->states_cur <= 0 &&
-                                   cur->rule.ptr->max_src_nodes <= 0)
-                                       pf_rm_rule(NULL, cur->rule.ptr);
-                       }
-                       RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
-                       pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
-                       pf_status.src_nodes--;
-                       pool_put(&pf_src_tree_pl, cur);
+                       pf_remove_src_node(cur);
                }
        }
 
@@ -1083,20 +1125,22 @@ pf_purge_expired_src_nodes(int waslocked
 void
 pf_src_tree_remove_state(struct pf_state *s)
 {
-       u_int32_t timeout;
+       u_int32_t                timeout;
+       struct pf_sn_item       *sni;
 
-       if (s->src_node != NULL) {
+       while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) {
+               SLIST_REMOVE_HEAD(&s->src_nodes, next);
                if (s->src.tcp_est)
-                       --s->src_node->conn;
-               if (--s->src_node->states <= 0) {
+                       --sni->sn->conn;
+               if (--sni->sn->states <= 0) {
                        timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
                        if (!timeout)
                                timeout =
                                    pf_default_rule.timeout[PFTM_SRC_NODE];
-                       s->src_node->expire = time_second + timeout;
+                       sni->sn->expire = time_second + timeout;
                }
+               pool_put(&pf_sn_item_pl, sni);
        }
-       s->src_node = NULL;
 }
 
 /* callers should be at splsoftnet */
@@ -2586,14 +2630,14 @@ pf_set_rt_ifp(struct pf_state *s, struct
 #ifdef INET
        case AF_INET:
                pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn,
-                   &r->route);
+                   &r->route, PF_SN_ROUTE);
                s->rt_kif = r->route.cur->kif;
                break;
 #endif /* INET */
 #ifdef INET6
        case AF_INET6:
                pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn,
-                   &r->route);
+                   &r->route, PF_SN_ROUTE);
                s->rt_kif = r->route.cur->kif;
                break;
 #endif /* INET6 */
@@ -2662,6 +2706,7 @@ pf_test_rule(struct pf_rule **rm, struct
        struct pf_ruleset       *ruleset = NULL;
        struct pf_rule_slist     rules;
        struct pf_rule_item     *ri;
+       struct pf_src_node      *sns[PF_SN_MAX];
        struct tcphdr           *th = pd->hdr.tcp;
        struct pf_state_key     *skw = NULL, *sks = NULL;
        struct pf_rule_actions   act;
@@ -2679,6 +2724,7 @@ pf_test_rule(struct pf_rule **rm, struct
        PF_ACPY(&daddr, pd->dst, pd->af);
 
        bzero(&act, sizeof(act));
+       bzero(sns, sizeof(sns));
        act.rtableid = -1;
        SLIST_INIT(&rules);
 
@@ -2819,7 +2865,7 @@ pf_test_rule(struct pf_rule **rm, struct
                                        SLIST_INSERT_HEAD(&rules, ri, entry);
                                        pf_rule_to_actions(r, &act);
                                        pf_get_transaddr(r, pd, &saddr, &sport,
-                                           &daddr, &dport);
+                                           &daddr, &dport, sns);
                                } else {
                                        match = 1;
                                        *rm = r;
@@ -2845,7 +2891,8 @@ pf_test_rule(struct pf_rule **rm, struct
        /* apply actions for last matching rule */
        if (lastr && lastr->action != PF_MATCH) {
                pf_rule_to_actions(lastr, &act);
-               pf_get_transaddr(lastr, pd, &saddr, &sport, &daddr, &dport);
+               pf_get_transaddr(lastr, pd, &saddr, &sport, &daddr, &dport,
+                   sns);
        }
 
        REASON_SET(&reason, PFRES_MATCH);
@@ -2853,6 +2900,7 @@ pf_test_rule(struct pf_rule **rm, struct
        if (act.log) {
                struct pf_rule_item *mr;
 
+               /* XXX this is BEFORE nat/rdr are actually applied! */ 
                if (r->log)
                        PFLOG_PACKET(kif, h, m, af, direction, reason,
                            r, a, ruleset, pd);
@@ -2922,9 +2970,17 @@ pf_test_rule(struct pf_rule **rm, struct
 
        if (!state_icmp && r->keep_state) {
                int action;
+
+               if (r->rule_flag & PFRULE_SRCTRACK &&
+                   pf_insert_src_node(&sns[PF_SN_NONE], r, PF_SN_NONE, pd->af,
+                   pd->src, NULL, 0) != 0) {
+                       REASON_SET(&reason, PFRES_SRCLIMIT);
+                       goto cleanup;
+               }
+
                action = pf_create_state(r, a, pd, &skw, &sks, m,
                    off, &saddr, sport, &daddr, dport, &rewrite, kif, sm, tag,
-                   bproto_sum, bip_sum, hdrlen, &rules, &act);
+                   bproto_sum, bip_sum, hdrlen, &rules, &act, sns);
 
                if (action != PF_PASS)
                        return (action);
@@ -2983,13 +3039,14 @@ pf_create_state(struct pf_rule *r, struc
     int off, struct pf_addr *saddr, u_int16_t sport, struct pf_addr *daddr,
     u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm,
     int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen,
-    struct pf_rule_slist *rules, struct pf_rule_actions *act)
+    struct pf_rule_slist *rules, struct pf_rule_actions *act,
+    struct pf_src_node *sns[PF_SN_MAX])
 {
        struct pf_state         *s = NULL;
-       struct pf_src_node      *sn = NULL;
        struct tcphdr           *th = pd->hdr.tcp;
        u_int16_t                mss = tcp_mssdflt;
        u_short                  reason;
+       u_int                    i;
 
        /* check maximums */
        if (r->max_states && (r->states_cur >= r->max_states)) {
@@ -2997,14 +3054,7 @@ pf_create_state(struct pf_rule *r, struc
                REASON_SET(&reason, PFRES_MAXSTATES);
                return (PF_DROP);
        }
-       /* src node for filter rule */
-       if ((r->rule_flag & PFRULE_SRCTRACK ||
-           r->rdr.opts & PF_POOL_STICKYADDR ||
-           r->nat.opts & PF_POOL_STICKYADDR) &&
-           pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
-               REASON_SET(&reason, PFRES_SRCLIMIT);
-               goto csfailed;
-       }
+
        s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO);
        if (s == NULL) {
                REASON_SET(&reason, PFRES_MEMORY);
@@ -3086,10 +3136,7 @@ pf_create_state(struct pf_rule *r, struc
        s->creation = time_second;
        s->expire = time_second;
 
-       if (sn != NULL) {
-               s->src_node = sn;
-               s->src_node->states++;
-       }
+       /* XXX on error all these should goto csfailed after extra cleanup */
        if (pd->proto == IPPROTO_TCP) {
                if (s->state_flags & PFSTATE_SCRUB_TCP &&
                    pf_normalize_tcp_init(m, off, pd, th, &s->src, &s->dst)) {
@@ -3116,7 +3163,7 @@ pf_create_state(struct pf_rule *r, struc
 
        if (pf_state_key_setup(pd, skw, sks, &saddr, &daddr, &sport, &dport,
            act->rtableid))
-               goto csfailed;
+               goto csfailed;  /* XXX leaks */
 
        if (pf_state_insert(BOUND_IFACE(r, kif), *skw, *sks, s)) {
                if (pd->proto == IPPROTO_TCP)
@@ -3129,6 +3176,24 @@ pf_create_state(struct pf_rule *r, struc
        } else
                *sm = s;
 
+       /* attach src nodes late, otherwise cleanup on error nontrivial */
+       for (i = 0; i < PF_SN_MAX; i++)
+               if (sns[i] != NULL) {
+                       struct pf_sn_item       *sni;
+
+                       sni = pool_get(&pf_sn_item_pl, PR_NOWAIT);
+                       if (sni == NULL) {
+                               REASON_SET(&reason, PFRES_MEMORY);
+                               pf_src_tree_remove_state(s);
+                               STATE_DEC_COUNTERS(s);
+                               pool_put(&pf_state_pl, s);
+                               return (PF_DROP);
+                       }
+                       sni->sn = sns[i];
+                       SLIST_INSERT_HEAD(&s->src_nodes, sni, next);
+                       sni->sn->states++;
+               }
+
        pf_set_rt_ifp(s, pd->src);      /* needs s->state_key set */
        if (tag > 0) {
                pf_tag_ref(tag);
@@ -3154,6 +3219,7 @@ pf_create_state(struct pf_rule *r, struc
        return (PF_PASS);
 
 csfailed:
+       /* skw/sks checks obsolete */
        if (*skw != NULL) {
                pool_put(&pf_state_key_pl, *skw);
                *skw = NULL;
@@ -3163,12 +3229,10 @@ csfailed:
                *sks = NULL;
        }
 
-       if (sn != NULL && sn->states == 0 && sn->expire == 0) {
-               RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
-               pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
-               pf_status.src_nodes--;
-               pool_put(&pf_src_tree_pl, sn);
-       }
+       for (i = 0; i < PF_SN_MAX; i++)
+               if (sns[i] != NULL)
+                       pf_remove_src_node(sns[i]);
+
        return (PF_DROP);
 }
 
@@ -3533,7 +3597,7 @@ pf_tcp_track_full(struct pf_state_peer *
                        if (dst->state == TCPS_SYN_SENT) {
                                dst->state = TCPS_ESTABLISHED;
                                if (src->state == TCPS_ESTABLISHED &&
-                                   (*state)->src_node != NULL &&
+                                   !SLIST_EMPTY(&(*state)->src_nodes) &&
                                    pf_src_connlimit(state)) {
                                        REASON_SET(reason, PFRES_SRCLIMIT);
                                        return (PF_DROP);
@@ -3689,7 +3753,7 @@ pf_tcp_track_sloppy(struct pf_state_peer
                if (dst->state == TCPS_SYN_SENT) {
                        dst->state = TCPS_ESTABLISHED;
                        if (src->state == TCPS_ESTABLISHED &&
-                           (*state)->src_node != NULL &&
+                           !SLIST_EMPTY(&(*state)->src_nodes) &&
                            pf_src_connlimit(state)) {
                                REASON_SET(reason, PFRES_SRCLIMIT);
                                return (PF_DROP);
@@ -3705,7 +3769,7 @@ pf_tcp_track_sloppy(struct pf_state_peer
                         * the destination, set the connection to established.
                         */
                        dst->state = src->state = TCPS_ESTABLISHED;
-                       if ((*state)->src_node != NULL &&
+                       if (!SLIST_EMPTY(&(*state)->src_nodes) &&
                            pf_src_connlimit(state)) {
                                REASON_SET(reason, PFRES_SRCLIMIT);
                                return (PF_DROP);
@@ -3804,7 +3868,7 @@ pf_test_state_tcp(struct pf_state **stat
                    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
                        REASON_SET(reason, PFRES_SYNPROXY);
                        return (PF_DROP);
-               } else if ((*state)->src_node != NULL &&
+               } else if (!SLIST_EMPTY(&(*state)->src_nodes) &&
                    pf_src_connlimit(state)) {
                        REASON_SET(reason, PFRES_SRCLIMIT);
                        return (PF_DROP);
@@ -5021,7 +5085,7 @@ pf_route(struct mbuf **m, struct pf_rule
                }
                if (s == NULL) {
                        pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
-                           &naddr, NULL, &sn, &r->route);
+                           &naddr, NULL, &sn, &r->route, PF_SN_ROUTE);
                        if (!PF_AZERO(&naddr, AF_INET))
                                dst->sin_addr.s_addr = naddr.v4.s_addr;
                        ifp = r->route.cur->kif ?
@@ -5203,7 +5267,7 @@ pf_route6(struct mbuf **m, struct pf_rul
        }
        if (s == NULL) {
                pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
-                   &naddr, NULL, &sn, &r->route);
+                   &naddr, NULL, &sn, &r->route, PF_SN_ROUTE);
                if (!PF_AZERO(&naddr, AF_INET6))
                        PF_ACPY((struct pf_addr *)&dst->sin6_addr,
                            &naddr, AF_INET6);
@@ -5685,10 +5749,11 @@ done:
                }
                if (s != NULL) {
                        struct pf_rule_item     *ri;
+                       struct pf_sn_item       *sni;
 
-                       if (s->src_node != NULL) {
-                               s->src_node->packets[dirndx]++;
-                               s->src_node->bytes[dirndx] += pd.tot_len;
+                       SLIST_FOREACH(sni, &s->src_nodes, next) {
+                               sni->sn->packets[dirndx]++;
+                               sni->sn->bytes[dirndx] += pd.tot_len;
                        }
                        dirndx = (dir == s->direction) ? 0 : 1;
                        s->packets[dirndx]++;
@@ -6126,9 +6191,11 @@ done:
                        a->bytes[dirndx] += pd.tot_len;
                }
                if (s != NULL) {
-                       if (s->src_node != NULL) {
-                               s->src_node->packets[dirndx]++;
-                               s->src_node->bytes[dirndx] += pd.tot_len;
+                       struct pf_sn_item       *sni;
+
+                       SLIST_FOREACH(sni, &s->src_nodes, next) {
+                               sni->sn->packets[dirndx]++;
+                               sni->sn->bytes[dirndx] += pd.tot_len;
                        }
                        dirndx = (dir == s->direction) ? 0 : 1;
                        s->packets[dirndx]++;
Index: sys/net/pf_ioctl.c
===================================================================
RCS file: /cvs/src/sys/net/pf_ioctl.c,v
retrieving revision 1.228
diff -u -p -r1.228 pf_ioctl.c
--- sys/net/pf_ioctl.c  24 Nov 2009 13:23:55 -0000      1.228
+++ sys/net/pf_ioctl.c  2 Dec 2009 14:15:32 -0000
@@ -159,6 +159,8 @@ pfattach(int num)
            &pool_allocator_nointr);
        pool_init(&pf_src_tree_pl, sizeof(struct pf_src_node), 0, 0, 0,
            "pfsrctrpl", NULL);
+       pool_init(&pf_sn_item_pl, sizeof(struct pf_sn_item), 0, 0, 0,
+           "pfsnitempl", NULL);
        pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl",
            NULL);
        pool_init(&pf_state_key_pl, sizeof(struct pf_state_key), 0, 0, 0,
@@ -2793,13 +2795,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
                struct pf_state         *state;
 
                RB_FOREACH(state, pf_state_tree_id, &tree_id)
-                       state->src_node = NULL;
-               RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
+                       pf_src_tree_remove_state(state);
+               RB_FOREACH(n, pf_src_tree, &tree_src_tracking)
                        n->expire = 1;
-                       n->states = 0;
-               }
                pf_purge_expired_src_nodes(1);
-               pf_status.src_nodes = 0;
                break;
        }
 
@@ -2820,13 +2819,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
                                &psnk->psnk_dst.addr.v.a.mask,
                                &sn->raddr, sn->af)) {
                                /* Handle state to src_node linkage */
-                               if (sn->states != 0) {
+                               if (sn->states != 0)
                                        RB_FOREACH(s, pf_state_tree_id,
-                                           &tree_id)
-                                               if (s->src_node == sn)
-                                                       s->src_node = NULL;
-                                       sn->states = 0;
-                               }
+                                          &tree_id)
+                                               pf_state_rm_src_node(s, sn);
                                sn->expire = 1;
                                killed++;
                        }
Index: sys/net/pf_lb.c
===================================================================
RCS file: /cvs/src/sys/net/pf_lb.c,v
retrieving revision 1.8
diff -u -p -r1.8 pf_lb.c
--- sys/net/pf_lb.c     3 Nov 2009 10:59:04 -0000       1.8
+++ sys/net/pf_lb.c     2 Dec 2009 14:15:32 -0000
@@ -174,7 +174,8 @@ pf_get_sport(sa_family_t af, u_int8_t pr
        u_int16_t               cut;
 
        bzero(&init_addr, sizeof(init_addr));
-       if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, &r->nat))
+       if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, &r->nat,
+           PF_SN_NAT))
                return (1);
 
        if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
@@ -247,7 +248,7 @@ pf_get_sport(sa_family_t af, u_int8_t pr
                case PF_POOL_RANDOM:
                case PF_POOL_ROUNDROBIN:
                        if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn,
-                           &r->nat))
+                           &r->nat, PF_SN_NAT))
                                return (1);
                        break;
                case PF_POOL_NONE:
@@ -262,8 +263,8 @@ pf_get_sport(sa_family_t af, u_int8_t pr
 
 int
 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
-    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn,
-    struct pf_pool *rpool)
+    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
+    struct pf_pool *rpool, enum pf_sn_types type)
 {
        unsigned char            hash[16];
        struct pf_addr          *raddr = &rpool->cur->addr.v.a.addr;
@@ -271,21 +272,20 @@ pf_map_addr(sa_family_t af, struct pf_ru
        struct pf_pooladdr      *acur = rpool->cur;
        struct pf_src_node       k;
 
-       if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR &&
+       if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
            (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
                k.af = af;
+               k.type = type;
                PF_ACPY(&k.addr, saddr, af);
-               if (r->rule_flag & PFRULE_RULESRCTRACK ||
-                   rpool->opts & PF_POOL_STICKYADDR)
-                       k.rule.ptr = r;
-               else
-                       k.rule.ptr = NULL;
+               k.rule.ptr = r;
                pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
-               *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
-               if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
-                       PF_ACPY(naddr, &(*sn)->raddr, af);
+               sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
+               if (sns[type] != NULL) {
+                       if (!PF_AZERO(&(sns[type])->raddr, af))
+                               PF_ACPY(naddr, &(sns[type])->raddr, af);
                        if (pf_status.debug >= PF_DEBUG_MISC) {
-                               printf("pf_map_addr: src tracking maps ");
+                               printf("pf_map_addr: src tracking (%u) maps ",
+                                   type);
                                pf_print_host(&k.addr, 0, af);
                                printf(" to ");
                                pf_print_host(naddr, 0, af);
@@ -428,8 +434,16 @@ pf_map_addr(sa_family_t af, struct pf_ru
                PF_AINC(&rpool->counter, af);
                break;
        }
-       if (*sn != NULL)
-               PF_ACPY(&(*sn)->raddr, naddr, af);
+
+       if (rpool->opts & PF_POOL_STICKYADDR) {
+               if (sns[type] != NULL) {
+                       pf_remove_src_node(sns[type]);
+                       sns[type] = NULL;
+               }
+               if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
+                   0))
+                       return (1);
+       }
 
        if (pf_status.debug >= PF_DEBUG_NOISY &&
            (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
@@ -443,19 +457,18 @@ pf_map_addr(sa_family_t af, struct pf_ru
 
 int
 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd, struct pf_addr *saddr,
-    u_int16_t *sport, struct pf_addr *daddr, u_int16_t *dport)
+    u_int16_t *sport, struct pf_addr *daddr, u_int16_t *dport,
+    struct pf_src_node **sns)
 {
        struct pf_addr  naddr;
        u_int16_t       nport = 0;
 
-       struct pf_src_node srcnode, *sn = &srcnode;
-
        if (!TAILQ_EMPTY(&r->nat.list)) {
                /* XXX is this right? what if rtable is changed at the same
                 * XXX time? where do I need to figure out the sport? */
                if (pf_get_sport(pd->af, pd->proto, r, saddr,
                    daddr, *dport, &naddr, &nport, r->nat.proxy_port[0],
-                   r->nat.proxy_port[1], &sn, pd->rdomain)) {
+                   r->nat.proxy_port[1], sns, pd->rdomain)) {
                        DPFPRINTF(PF_DEBUG_MISC,
                            ("pf: NAT proxy port allocation "
                            "(%u-%u) failed\n",
@@ -468,7 +481,8 @@ pf_get_transaddr(struct pf_rule *r, stru
                        *sport = nport;
        }
        if (!TAILQ_EMPTY(&r->rdr.list)) {
-               if (pf_map_addr(pd->af, r, saddr, &naddr, NULL, &sn, &r->rdr))
+               if (pf_map_addr(pd->af, r, saddr, &naddr, NULL, sns, &r->rdr,
+                   PF_SN_RDR))
                        return (-1);
                if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
                        PF_POOLMASK(&naddr, &naddr,  &r->rdr.cur->addr.v.a.mask,
@@ -497,4 +511,3 @@ pf_get_transaddr(struct pf_rule *r, stru
 
        return (0);
 }
-
Index: sys/net/pfvar.h
===================================================================
RCS file: /cvs/src/sys/net/pfvar.h,v
retrieving revision 1.301
diff -u -p -r1.301 pfvar.h
--- sys/net/pfvar.h     24 Nov 2009 13:23:55 -0000      1.301
+++ sys/net/pfvar.h     2 Dec 2009 14:15:32 -0000
@@ -658,23 +658,32 @@ struct pf_rule_item {
 
 SLIST_HEAD(pf_rule_slist, pf_rule_item);
 
+enum pf_sn_types { PF_SN_NONE, PF_SN_NAT, PF_SN_RDR, PF_SN_ROUTE, PF_SN_MAX };
+
 struct pf_src_node {
-       RB_ENTRY(pf_src_node) entry;
-       struct pf_addr   addr;
-       struct pf_addr   raddr;
-       union pf_rule_ptr rule;
-       struct pfi_kif  *kif;
-       u_int64_t        bytes[2];
-       u_int64_t        packets[2];
-       u_int32_t        states;
-       u_int32_t        conn;
-       struct pf_threshold     conn_rate;
-       u_int32_t        creation;
-       u_int32_t        expire;
-       sa_family_t      af;
-       u_int8_t         ruletype;
+       RB_ENTRY(pf_src_node)    entry;
+       struct pf_addr           addr;
+       struct pf_addr           raddr;
+       union pf_rule_ptr        rule;
+       struct pfi_kif          *kif;
+       u_int64_t                bytes[2];
+       u_int64_t                packets[2];
+       u_int32_t                states;
+       u_int32_t                conn;
+       struct pf_threshold      conn_rate;
+       u_int32_t                creation;
+       u_int32_t                expire;
+       sa_family_t              af;
+       u_int8_t                 type;
+};
+
+struct pf_sn_item {
+       SLIST_ENTRY(pf_sn_item)  next;
+       struct pf_src_node      *sn;
 };
 
+SLIST_HEAD(pf_sn_head, pf_sn_item);
+
 #define PFSNODE_HIWAT          10000   /* default source node table size */
 
 struct pf_state_scrub {
@@ -766,10 +775,10 @@ struct pf_state {
        union pf_rule_ptr        rule;
        union pf_rule_ptr        anchor;
        struct pf_addr           rt_addr;
+       struct pf_sn_head        src_nodes;
        struct pf_state_key     *key[2];        /* addresses stack and wire  */
        struct pfi_kif          *kif;
        struct pfi_kif          *rt_kif;
-       struct pf_src_node      *src_node;
        u_int64_t                packets[2];
        u_int64_t                bytes[2];
        u_int32_t                creation;
@@ -1649,7 +1658,7 @@ extern int                         
pf_tbladdr_setup(struct pf
 extern void                     pf_tbladdr_remove(struct pf_addr_wrap *);
 extern void                     pf_tbladdr_copyout(struct pf_addr_wrap *);
 extern void                     pf_calc_skip_steps(struct pf_rulequeue *);
-extern struct pool              pf_src_tree_pl, pf_rule_pl;
+extern struct pool              pf_src_tree_pl, pf_sn_item_pl, pf_rule_pl;
 extern struct pool              pf_state_pl, pf_state_key_pl, pf_state_item_pl,
                                    pf_altq_pl, pf_pooladdr_pl, pf_rule_item_pl;
 extern struct pool              pf_state_scrub_pl;
@@ -1662,10 +1671,17 @@ extern int                       pf_state_insert(struct 
pfi
                                    struct pf_state_key *,
                                    struct pf_state_key *,
                                    struct pf_state *);
-extern int                      pf_insert_src_node(struct pf_src_node **,
-                                   struct pf_rule *, struct pf_addr *,
-                                   sa_family_t);
+int                             pf_insert_src_node(struct pf_src_node **,
+                                   struct pf_rule *, enum pf_sn_types,
+                                   sa_family_t, struct pf_addr *,
+                                   struct pf_addr *, int);
+void                            pf_remove_src_node(struct pf_src_node *);
+struct pf_src_node             *pf_get_src_node(struct pf_state *,
+                                   enum pf_sn_types);
 void                            pf_src_tree_remove_state(struct pf_state *);
+void                            pf_state_rm_src_node(struct pf_state *,
+                                   struct pf_src_node *);
+
 extern struct pf_state         *pf_find_state_byid(struct pf_state_cmp *);
 extern struct pf_state         *pf_find_state_all(struct pf_state_key_cmp *,
                                    u_int, int *);
@@ -1870,17 +1886,18 @@ int                      pf_step_out_of_anchor(int *, 
stru
 
 int                     pf_get_transaddr(struct pf_rule *, struct pf_pdesc *,
                            struct pf_addr *, u_int16_t *, struct pf_addr *,
-                           u_int16_t *);
+                           u_int16_t *, struct pf_src_node **);
 
 int                     pf_map_addr(sa_family_t, struct pf_rule *,
                            struct pf_addr *, struct pf_addr *,
                            struct pf_addr *, struct pf_src_node **,
-                           struct pf_pool *);
+                           struct pf_pool *, enum pf_sn_types);
 
 int                     pf_state_key_setup(struct pf_pdesc *,
                            struct pf_state_key **, struct pf_state_key **,
                            struct pf_addr **, struct pf_addr **,
                            u_int16_t *, u_int16_t *, int);
+
 #endif /* _KERNEL */
 
 
Index: sbin/pfctl/pfctl_parser.c
===================================================================
RCS file: /cvs/src/sbin/pfctl/pfctl_parser.c,v
retrieving revision 1.252
diff -u -p -r1.252 pfctl_parser.c
--- sbin/pfctl/pfctl_parser.c   23 Nov 2009 21:29:21 -0000      1.252
+++ sbin/pfctl/pfctl_parser.c   2 Dec 2009 14:15:33 -0000
@@ -619,9 +619,20 @@ print_src_node(struct pf_src_node *sn, i
 
        aw.v.a.addr = sn->addr;
        print_addr(&aw, sn->af, opts & PF_OPT_VERBOSE2);
-       printf(" -> ");
-       aw.v.a.addr = sn->raddr;
-       print_addr(&aw, sn->af, opts & PF_OPT_VERBOSE2);
+
+       if (!PF_AZERO(&sn->raddr, sn->af)) {
+               if (sn->type == PF_SN_NAT)
+                       printf(" nat-to ");
+               else if (sn->type == PF_SN_RDR)
+                       printf(" rdr-to ");
+               else if (sn->type == PF_SN_ROUTE)
+                       printf(" route-to ");
+               else
+                       printf(" ??? (%u) ", sn->type);
+               aw.v.a.addr = sn->raddr;
+               print_addr(&aw, sn->af, opts & PF_OPT_VERBOSE2);
+       }
+
        printf(" ( states %u, connections %u, rate %u.%u/%us )\n", sn->states,
            sn->conn, sn->conn_rate.count / 1000,
            (sn->conn_rate.count % 1000) / 100, sn->conn_rate.seconds);
@@ -642,13 +653,8 @@ print_src_node(struct pf_src_node *sn, i
                printf(", %llu pkts, %llu bytes",
                    sn->packets[0] + sn->packets[1],
                    sn->bytes[0] + sn->bytes[1]);
-               switch (sn->ruletype) {
-               case PF_PASS:
-               case PF_MATCH:
-                       if (sn->rule.nr != -1)
-                               printf(", filter rule %u", sn->rule.nr);
-                       break;
-               }
+               if (sn->rule.nr != -1)
+                       printf(", rule %u", sn->rule.nr);
                printf("\n");
        }
 }

-- 
Henning Brauer, h...@bsws.de, henn...@openbsd.org
BS Web Services, http://bsws.de
Full-Service ISP - Secure Hosting, Mail and DNS Services
Dedicated Servers, Rootservers, Application Hosting

Reply via email to