This diff optimized subtree walks. In other words it specifies a subtree (as a prefix/prefixlen combo) and only walks the entries that are under this covering route.
Instead of doing a full table walk this will only walk part of the tree and is therefor much faster if the subtree is small. -- :wq Claudio Index: rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v retrieving revision 1.575 diff -u -p -r1.575 rde.c --- rde.c 9 Sep 2022 13:33:24 -0000 1.575 +++ rde.c 9 Sep 2022 14:46:03 -0000 @@ -2648,35 +2648,6 @@ rde_dump_upcall(struct rib_entry *re, vo } static void -rde_dump_prefix_upcall(struct rib_entry *re, void *ptr) -{ - struct rde_dump_ctx *ctx = ptr; - struct prefix *p; - struct pt_entry *pt; - struct bgpd_addr addr; - - pt = re->prefix; - pt_getaddr(pt, &addr); - if (addr.aid != ctx->req.prefix.aid) - return; - if (ctx->req.flags & F_LONGER) { - if (ctx->req.prefixlen > pt->prefixlen) - return; - if (!prefix_compare(&ctx->req.prefix, &addr, - ctx->req.prefixlen)) - TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) - rde_dump_filter(p, &ctx->req, 0); - } else { - if (ctx->req.prefixlen < pt->prefixlen) - return; - if (!prefix_compare(&addr, &ctx->req.prefix, - pt->prefixlen)) - TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) - rde_dump_filter(p, &ctx->req, 0); - } -} - -static void rde_dump_adjout_upcall(struct prefix *p, void *ptr) { struct rde_dump_ctx *ctx = ptr; @@ -2688,35 +2659,6 @@ rde_dump_adjout_upcall(struct prefix *p, rde_dump_filter(p, &ctx->req, 1); } -static void -rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr) -{ - struct rde_dump_ctx *ctx = ptr; - struct bgpd_addr addr; - - if ((p->flags & PREFIX_FLAG_ADJOUT) == 0) - fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__); - if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) - return; - - pt_getaddr(p->pt, &addr); - if (addr.aid != ctx->req.prefix.aid) - return; - if (ctx->req.flags & F_LONGER) { - if (ctx->req.prefixlen > p->pt->prefixlen) - return; - if (!prefix_compare(&ctx->req.prefix, &addr, - ctx->req.prefixlen)) - rde_dump_filter(p, &ctx->req, 1); - } else { - if (ctx->req.prefixlen < p->pt->prefixlen) - return; - if (!prefix_compare(&addr, &ctx->req.prefix, - p->pt->prefixlen)) - rde_dump_filter(p, &ctx->req, 1); - } -} - static int rde_dump_throttled(void *arg) { @@ -2745,10 +2687,10 @@ rde_dump_done(void *arg, uint8_t aid) goto nomem; break; case IMSG_CTL_SHOW_RIB_PREFIX: - if (prefix_dump_new(peer, ctx->req.aid, - CTL_MSG_HIGH_MARK, ctx, - rde_dump_adjout_prefix_upcall, - rde_dump_done, rde_dump_throttled) == -1) + if (prefix_dump_subtree(peer, &ctx->req.prefix, + ctx->req.prefixlen, CTL_MSG_HIGH_MARK, ctx, + rde_dump_adjout_upcall, rde_dump_done, + rde_dump_throttled) == -1) goto nomem; break; default: @@ -2817,9 +2759,9 @@ rde_dump_ctx_new(struct ctl_show_rib_req break; case IMSG_CTL_SHOW_RIB_PREFIX: if (req->flags & F_LONGER) { - if (prefix_dump_new(peer, ctx->req.aid, - CTL_MSG_HIGH_MARK, ctx, - rde_dump_adjout_prefix_upcall, + if (prefix_dump_subtree(peer, &req->prefix, + req->prefixlen, CTL_MSG_HIGH_MARK, ctx, + rde_dump_adjout_upcall, rde_dump_done, rde_dump_throttled) == -1) goto nomem; break; @@ -2900,8 +2842,8 @@ rde_dump_ctx_new(struct ctl_show_rib_req break; case IMSG_CTL_SHOW_RIB_PREFIX: if (req->flags & F_LONGER) { - if (rib_dump_new(rid, ctx->req.aid, - CTL_MSG_HIGH_MARK, ctx, rde_dump_prefix_upcall, + if (rib_dump_subtree(rid, &req->prefix, req->prefixlen, + CTL_MSG_HIGH_MARK, ctx, rde_dump_upcall, rde_dump_done, rde_dump_throttled) == -1) goto nomem; break; Index: rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v retrieving revision 1.270 diff -u -p -r1.270 rde.h --- rde.h 1 Sep 2022 13:23:24 -0000 1.270 +++ rde.h 9 Sep 2022 14:44:31 -0000 @@ -570,6 +570,11 @@ int rib_dump_new(uint16_t, uint8_t, un void (*)(struct rib_entry *, void *), void (*)(void *, uint8_t), int (*)(void *)); +int rib_dump_subtree(uint16_t, struct bgpd_addr *, uint8_t, + unsigned int count, void *arg, + void (*)(struct rib_entry *, void *), + void (*)(void *, uint8_t), + int (*)(void *)); void rib_dump_terminate(void *); static inline struct rib * @@ -611,6 +616,10 @@ void prefix_adjout_dump(struct rde_pee void (*)(struct prefix *, void *)); int prefix_dump_new(struct rde_peer *, uint8_t, unsigned int, void *, void (*)(struct prefix *, void *), + void (*)(void *, uint8_t), int (*)(void *)); +int prefix_dump_subtree(struct rde_peer *, struct bgpd_addr *, + uint8_t, unsigned int, void *, + void (*)(struct prefix *, void *), void (*)(void *, uint8_t), int (*)(void *)); int prefix_write(u_char *, int, struct bgpd_addr *, uint8_t, int); int prefix_writebuf(struct ibuf *, struct bgpd_addr *, uint8_t); Index: rde_rib.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v retrieving revision 1.248 diff -u -p -r1.248 rde_rib.c --- rde_rib.c 1 Sep 2022 13:19:11 -0000 1.248 +++ rde_rib.c 9 Sep 2022 14:54:26 -0000 @@ -58,8 +58,10 @@ struct rib_context { void (*ctx_done)(void *, uint8_t); int (*ctx_throttle)(void *); void *ctx_arg; + struct bgpd_addr ctx_subtree; unsigned int ctx_count; uint8_t ctx_aid; + uint8_t ctx_subtreelen; }; LIST_HEAD(, rib_context) rib_dumps = LIST_HEAD_INITIALIZER(rib_dumps); @@ -396,16 +398,17 @@ rib_empty(struct rib_entry *re) static struct rib_entry * rib_restart(struct rib_context *ctx) { - struct rib_entry *re; + struct rib_entry *re = NULL; - re = re_unlock(ctx->ctx_re); + if (ctx->ctx_re) + re = re_unlock(ctx->ctx_re); /* find first non empty element */ while (re && rib_empty(re)) re = RB_NEXT(rib_tree, unused, re); /* free the previously locked rib element if empty */ - if (rib_empty(ctx->ctx_re)) + if (ctx->ctx_re && rib_empty(ctx->ctx_re)) rib_remove(ctx->ctx_re); ctx->ctx_re = NULL; return (re); @@ -422,7 +425,7 @@ rib_dump_r(struct rib_context *ctx) if (rib == NULL) fatalx("%s: rib id %u gone", __func__, ctx->ctx_id); - if (ctx->ctx_re == NULL) + if (ctx->ctx_re == NULL && ctx->ctx_subtree.aid == AID_UNSPEC) re = RB_MIN(rib_tree, rib_tree(rib)); else re = rib_restart(ctx); @@ -435,6 +438,14 @@ rib_dump_r(struct rib_context *ctx) if (ctx->ctx_aid != AID_UNSPEC && ctx->ctx_aid != re->prefix->aid) continue; + if (ctx->ctx_subtree.aid != AID_UNSPEC) { + struct bgpd_addr addr; + pt_getaddr(re->prefix, &addr); + if (prefix_compare(&ctx->ctx_subtree, &addr, + ctx->ctx_subtreelen) != 0) + /* left subtree, walk is done */ + break; + } if (ctx->ctx_count && i++ >= ctx->ctx_count && !re_is_locked(re)) { /* store and lock last element */ @@ -543,6 +554,42 @@ rib_dump_new(uint16_t id, uint8_t aid, u return 0; } +int +rib_dump_subtree(uint16_t id, struct bgpd_addr *subtree, uint8_t subtreelen, + unsigned int count, void *arg, void (*upcall)(struct rib_entry *, void *), + void (*done)(void *, uint8_t), int (*throttle)(void *)) +{ + struct rib_context *ctx; + struct rib_entry xre; + + if ((ctx = calloc(1, sizeof(*ctx))) == NULL) + return -1; + ctx->ctx_id = id; + ctx->ctx_aid = subtree->aid; + ctx->ctx_count = count; + ctx->ctx_arg = arg; + ctx->ctx_rib_call = upcall; + ctx->ctx_done = done; + ctx->ctx_throttle = throttle; + ctx->ctx_subtree = *subtree; + ctx->ctx_subtreelen = subtreelen; + + LIST_INSERT_HEAD(&rib_dumps, ctx, entry); + + /* lookup start of subtree */ + memset(&xre, 0, sizeof(xre)); + xre.prefix = pt_fill(subtree, subtreelen); + ctx->ctx_re = RB_NFIND(rib_tree, rib_tree(rib_byid(id)), &xre); + if (ctx->ctx_re) + re_lock(ctx->ctx_re); + + /* requested a sync traversal */ + if (count == 0) + rib_dump_r(ctx); + + return 0; +} + /* path specific functions */ static struct rde_aspath *path_lookup(struct rde_aspath *); @@ -1253,11 +1300,12 @@ prefix_adjout_destroy(struct prefix *p) static struct prefix * prefix_restart(struct rib_context *ctx) { - struct prefix *p; + struct prefix *p = NULL; - p = prefix_unlock(ctx->ctx_p); + if (ctx->ctx_p) + p = prefix_unlock(ctx->ctx_p); - if (prefix_is_dead(p)) { + if (p && prefix_is_dead(p)) { struct prefix *next; next = RB_NEXT(prefix_index, unused, p); @@ -1278,7 +1326,7 @@ prefix_dump_r(struct rib_context *ctx) if ((peer = peer_get(ctx->ctx_id)) == NULL) goto done; - if (ctx->ctx_p == NULL) + if (ctx->ctx_p == NULL && ctx->ctx_subtree.aid == AID_UNSPEC) p = RB_MIN(prefix_index, &peer->adj_rib_out); else p = prefix_restart(ctx); @@ -1290,6 +1338,14 @@ prefix_dump_r(struct rib_context *ctx) if (ctx->ctx_aid != AID_UNSPEC && ctx->ctx_aid != p->pt->aid) continue; + if (ctx->ctx_subtree.aid != AID_UNSPEC) { + struct bgpd_addr addr; + pt_getaddr(p->pt, &addr); + if (prefix_compare(&ctx->ctx_subtree, &addr, + ctx->ctx_subtreelen) != 0) + /* left subtree, walk is done */ + break; + } if (ctx->ctx_count && i++ >= ctx->ctx_count && !prefix_is_locked(p)) { /* store and lock last element */ @@ -1324,6 +1380,43 @@ prefix_dump_new(struct rde_peer *peer, u ctx->ctx_throttle = throttle; LIST_INSERT_HEAD(&rib_dumps, ctx, entry); + + /* requested a sync traversal */ + if (count == 0) + prefix_dump_r(ctx); + + return 0; +} + +int +prefix_dump_subtree(struct rde_peer *peer, struct bgpd_addr *subtree, + uint8_t subtreelen, unsigned int count, void *arg, + void (*upcall)(struct prefix *, void *), void (*done)(void *, uint8_t), + int (*throttle)(void *)) +{ + struct rib_context *ctx; + struct prefix xp; + + if ((ctx = calloc(1, sizeof(*ctx))) == NULL) + return -1; + ctx->ctx_id = peer->conf.id; + ctx->ctx_aid = subtree->aid; + ctx->ctx_count = count; + ctx->ctx_arg = arg; + ctx->ctx_prefix_call = upcall; + ctx->ctx_done = done; + ctx->ctx_throttle = throttle; + ctx->ctx_subtree = *subtree; + ctx->ctx_subtreelen = subtreelen; + + LIST_INSERT_HEAD(&rib_dumps, ctx, entry); + + /* lookup start of subtree */ + memset(&xp, 0, sizeof(xp)); + xp.pt = pt_fill(subtree, subtreelen); + ctx->ctx_p = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp); + if (ctx->ctx_p) + prefix_lock(ctx->ctx_p); /* requested a sync traversal */ if (count == 0)