Hello, diff below reshuffles pfr_create_ktable() so all memory allocations happens outside of NET_LOCK()/PF_LOCK(). pf(4) currently allocates for new table with PR_WAITOK flag, when function is invoked on behalf of ioctl (PFR_FLAG_USERIOCTL flag is set). PR_WAITOK for memory allocation is OK if we don't hold NET_LOCK() of PF_LOCK() while waiting for memory. Unfortunately code in current is not OK in this respect. Diff below fixes that.
The change pushes locks acquisition from pf_ioctl.c further down to pfr_create_ktable(). The proposed change splits current giant loop in pfr_create_ktable() into three loops: the first loop allocates auxiliary list of tables we copied in. the loop also checks for duplicity in input. we grab NET_LOCK()/PF_LOCK() before entering second loop. second loop moves tables instances from auxiliary list into addq, when table does not exist already. If table does exist it is left in auxq and we put existing table we've just found into changeq. If no PFR_FLAG_DUMMY is set the code will execute the third loop, which processes addq we've constructed earlier in second loop. The most complicated part is the third loop, which processes addq. The third loop must handle those situations: - table which is being inserted is attached to main ruleset, in this case we must move '->pfrkt_root' table we've pre-allocated in the first to auxq - the root table ('->pfrkt_root') for newly introduced table exists already. In that case use existing one and move pre-allocated table to auxq - if root table does not exist yet, then initialize it and insert it into addq. OK ? thanks and regards sashan --------8<---------------8<---------------8<------------------8<-------- diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c index 8315b115474..1f036e1368f 100644 --- a/sys/net/pf_ioctl.c +++ b/sys/net/pf_ioctl.c @@ -2217,12 +2217,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = ENODEV; goto fail; } - NET_LOCK(); - PF_LOCK(); error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); - PF_UNLOCK(); - NET_UNLOCK(); break; } diff --git a/sys/net/pf_table.c b/sys/net/pf_table.c index fb23bcabe04..79fd3e0447b 100644 --- a/sys/net/pf_table.c +++ b/sys/net/pf_table.c @@ -189,6 +189,7 @@ void pfr_clstats_ktable(struct pfr_ktable *, time_t, int); struct pfr_ktable *pfr_create_ktable(struct pfr_table *, time_t, int, int); void pfr_destroy_ktables(struct pfr_ktableworkq *, int); +void pfr_destroy_ktables_aux(struct pfr_ktableworkq *); void pfr_destroy_ktable(struct pfr_ktable *, int); int pfr_ktable_compare(struct pfr_ktable *, struct pfr_ktable *); @@ -1493,14 +1494,16 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) int pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) { - struct pfr_ktableworkq addq, changeq; - struct pfr_ktable *p, *q, *r, key; + struct pfr_ktableworkq addq, changeq, auxq; + struct pfr_ktable *p, *q, *r, *n, *w, key; int i, rv, xadd = 0; time_t tzero = gettime(); ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); SLIST_INIT(&addq); SLIST_INIT(&changeq); + SLIST_INIT(&auxq); + /* pre-allocate all memory outside of locks */ for (i = 0; i < size; i++) { YIELD(flags & PFR_FLAG_USERIOCTL); if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) @@ -1509,65 +1512,142 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) flags & PFR_FLAG_USERIOCTL)) senderr(EINVAL); key.pfrkt_flags |= PFR_TFLAG_ACTIVE; - p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + p = pfr_create_ktable(&key.pfrkt_t, tzero, 0, + !(flags & PFR_FLAG_USERIOCTL)); + if (p == NULL) + senderr(ENOMEM); + + /* + * Note: we also pre-allocate a root table here. We keep it + * at ->pfrkt_root, which we must not forget about. + */ + key.pfrkt_flags = 0; + memset(key.pfrkt_anchor, 0, sizeof(key.pfrkt_anchor)); + p->pfrkt_root = pfr_create_ktable(&key.pfrkt_t, 0, 0, + !(flags & PFR_FLAG_USERIOCTL)); + if (p->pfrkt_root == NULL) { + pfr_destroy_ktable(p, 0); + senderr(ENOMEM); + } + + SLIST_FOREACH(q, &auxq, pfrkt_workq) { + if (!pfr_ktable_compare(p, q)) { + /* + * We need no lock here, because `p` is empty, + * there are no rules or shadow tables + * attached. + */ + pfr_destroy_ktable(p->pfrkt_root, 0); + p->pfrkt_root = NULL; + pfr_destroy_ktable(p, 0); + break; + } + } + + SLIST_INSERT_HEAD(&auxq, p, pfrkt_workq); + } + + /* + * auxq contains freshly allocated tables with no dups. + * also note there are no rulesets attached, because + * the attach operation requires PF_LOCK(). + */ + NET_LOCK(); + PF_LOCK(); + SLIST_FOREACH_SAFE(n, &auxq, pfrkt_workq, w) { + p = RB_FIND(pfr_ktablehead, &pfr_ktables, n); if (p == NULL) { - p = pfr_create_ktable(&key.pfrkt_t, tzero, 1, - !(flags & PFR_FLAG_USERIOCTL)); - if (p == NULL) - senderr(ENOMEM); - SLIST_FOREACH(q, &addq, pfrkt_workq) { - if (!pfr_ktable_compare(p, q)) { - pfr_destroy_ktable(p, 0); - goto _skip; - } + SLIST_REMOVE(&auxq, n, pfr_ktable, pfrkt_workq); + SLIST_INSERT_HEAD(&addq, n, pfrkt_workq); + } else if (!(flags & PFR_FLAG_DUMMY)) { + p->pfrkt_nflags = (p->pfrkt_flags & + ~PFR_TFLAG_USRMASK) | key.pfrkt_flags; + SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq); + } + xadd++; + } + + if (!(flags & PFR_FLAG_DUMMY)) { + /* + * addq contains tables we have to insert and attach rules to + * them + * + * changeq contains tables we need to update + * + * auxq contains pre-allocated tables, we won't use and we must + * free them + */ + SLIST_FOREACH_SAFE(p, &addq, pfrkt_workq, w) { + p->pfrkt_rs = pf_find_or_create_ruleset( + p->pfrkt_anchor); + if (p->pfrkt_rs == NULL) { + xadd--; + SLIST_REMOVE(&addq, p, pfr_ktable, pfrkt_workq); + SLIST_INSERT_HEAD(&auxq, p, pfrkt_workq); + continue; } - SLIST_INSERT_HEAD(&addq, p, pfrkt_workq); - xadd++; - if (!key.pfrkt_anchor[0]) - goto _skip; + p->pfrkt_rs->tables++; - /* find or create root table */ - bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor)); - r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (!p->pfrkt_anchor[0]) { + q = p->pfrkt_root; + p->pfrkt_root = NULL; + SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq); + continue; + } + + /* use pre-allocated root table as a key */ + q = p->pfrkt_root; + p->pfrkt_root = NULL; + r = RB_FIND(pfr_ktablehead, &pfr_ktables, q); if (r != NULL) { p->pfrkt_root = r; - goto _skip; + SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq); + continue; } - SLIST_FOREACH(q, &addq, pfrkt_workq) { - if (!pfr_ktable_compare(&key, q)) { - p->pfrkt_root = q; - goto _skip; + /* + * there is a chance we could create root table in + * earlier iteration. such table may exist in addq only + * then. + */ + SLIST_FOREACH(r, &addq, pfrkt_workq) { + if (!pfr_ktable_compare(r, q)) { + /* + * `r` is our root table we've found + * earlier, `q` can get dropped. + */ + p->pfrkt_root = r; + SLIST_INSERT_HEAD(&auxq, q, + pfrkt_workq); + continue; } } - key.pfrkt_flags = 0; - r = pfr_create_ktable(&key.pfrkt_t, 0, 1, - !(flags & PFR_FLAG_USERIOCTL)); - if (r == NULL) - senderr(ENOMEM); - SLIST_INSERT_HEAD(&addq, r, pfrkt_workq); - p->pfrkt_root = r; - } else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { - SLIST_FOREACH(q, &changeq, pfrkt_workq) - if (!pfr_ktable_compare(&key, q)) - goto _skip; - p->pfrkt_nflags = (p->pfrkt_flags & - ~PFR_TFLAG_USRMASK) | key.pfrkt_flags; - SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq); - xadd++; + q->pfrkt_rs = pf_find_or_create_ruleset( + q->pfrkt_root->pfrkt_anchor); + /* + * root tables are attached to main ruleset, + * because ->pfrkt_anchor[0] == '\0' + */ + KASSERT(q->pfrkt_rs == &pf_main_ruleset); + q->pfrkt_rs->tables++; + p->pfrkt_root = q; + SLIST_INSERT_HEAD(&addq, q, pfrkt_workq); } -_skip: - ; - } - if (!(flags & PFR_FLAG_DUMMY)) { + pfr_insert_ktables(&addq); pfr_setflags_ktables(&changeq); - } else - pfr_destroy_ktables(&addq, 0); + } + PF_UNLOCK(); + NET_UNLOCK(); + + pfr_destroy_ktables_aux(&auxq); + if (flags & PFR_FLAG_DUMMY) + pfr_destroy_ktables_aux(&addq); + if (nadd != NULL) *nadd = xadd; return (0); _bad: - pfr_destroy_ktables(&addq, 0); + pfr_destroy_ktables_aux(&auxq); return (rv); } @@ -2214,6 +2294,7 @@ pfr_create_ktable(struct pfr_table *tbl, time_t tzero, int attachruleset, kt->pfrkt_t = *tbl; if (attachruleset) { + PF_ASSERT_LOCKED(); rs = pf_find_or_create_ruleset(tbl->pfrt_anchor); if (!rs) { pfr_destroy_ktable(kt, 0); @@ -2249,6 +2330,31 @@ pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) } } +void +pfr_destroy_ktables_aux(struct pfr_ktableworkq *auxq) +{ + struct pfr_ktable *p; + + while ((p = SLIST_FIRST(auxq)) != NULL) { + SLIST_REMOVE_HEAD(auxq, pfrkt_workq); + /* + * There must be no extra data (rules, shadow tables, ...) + * attached, because auxq holds just empty memory to be + * initialized. Therefore we can also be called with no lock. + */ + if (p->pfrkt_root != NULL) { + KASSERT(p->pfrkt_root->pfrkt_rs == NULL); + KASSERT(p->pfrkt_root->pfrkt_shadow == NULL); + KASSERT(p->pfrkt_root->pfrkt_root == NULL); + pfr_destroy_ktable(p->pfrkt_root, 0); + p->pfrkt_root = NULL; + } + KASSERT(p->pfrkt_rs == NULL); + KASSERT(p->pfrkt_shadow == NULL); + pfr_destroy_ktable(p, 0); + } +} + void pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) {