On large bgpd instances the hash tables used for rde_aspath, aspath and communities get overloaded to a point that aspath_get() consumes a large amount of CPU time.
This diff improves the situation by a) using a RB tree for rde_aspath and communities and b) dropping the hash table for aspath all together. In most cases the memory saving of the aspath cache does not justify the extra CPU the lookups consume (even when using an RB tree aspath_get() is so hot that it uses 30-40% CPU. This is one big diff but if people prefer I can split it up. Tested on the route collector with 80Mio prefixes (where this diff has a noticable effect). -- :wq Claudio Index: usr.sbin/bgpctl/output.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v retrieving revision 1.26 diff -u -p -r1.26 output.c --- usr.sbin/bgpctl/output.c 10 Aug 2022 10:21:47 -0000 1.26 +++ usr.sbin/bgpctl/output.c 25 Aug 2022 13:31:27 -0000 @@ -221,13 +221,16 @@ show_neighbor_msgstats(struct peer *p) p->stats.msg_rcvd_update + p->stats.msg_rcvd_keepalive + p->stats.msg_rcvd_rrefresh); printf(" Update statistics:\n"); - printf(" %-15s %-10s %-10s\n", "", "Sent", "Received"); + printf(" %-15s %-10s %-10s %-10s\n", "", "Sent", "Received", + "Pending"); printf(" %-15s %10u %10u\n", "Prefixes", p->stats.prefix_out_cnt, p->stats.prefix_cnt); - printf(" %-15s %10llu %10llu\n", "Updates", - p->stats.prefix_sent_update, p->stats.prefix_rcvd_update); - printf(" %-15s %10llu %10llu\n", "Withdraws", - p->stats.prefix_sent_withdraw, p->stats.prefix_rcvd_withdraw); + printf(" %-15s %10llu %10llu %10u\n", "Updates", + p->stats.prefix_sent_update, p->stats.prefix_rcvd_update, + p->stats.pending_update); + printf(" %-15s %10llu %10llu %10u\n", "Withdraws", + p->stats.prefix_sent_withdraw, p->stats.prefix_rcvd_withdraw, + p->stats.pending_withdraw); printf(" %-15s %10llu %10llu\n", "End-of-Rib", p->stats.prefix_sent_eor, p->stats.prefix_rcvd_eor); printf(" Route Refresh statistics:\n"); @@ -1000,9 +1003,7 @@ show_rib_mem(struct rde_memstats *stats) printf("\t and holding %lld references\n", stats->path_refs); printf("%10lld BGP AS-PATH attribute entries using " - "%s of memory\n\t and holding %lld references\n", - stats->aspath_cnt, fmt_mem(stats->aspath_size), - stats->aspath_refs); + "%s of memory\n", stats->aspath_cnt, fmt_mem(stats->aspath_size)); printf("%10lld entries for %lld BGP communities " "using %s of memory\n", stats->comm_cnt, stats->comm_nmemb, fmt_mem(stats->comm_cnt * sizeof(struct rde_community) + Index: usr.sbin/bgpctl/output_json.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v retrieving revision 1.20 diff -u -p -r1.20 output_json.c --- usr.sbin/bgpctl/output_json.c 28 Jul 2022 10:40:25 -0000 1.20 +++ usr.sbin/bgpctl/output_json.c 25 Aug 2022 13:31:14 -0000 @@ -190,6 +190,11 @@ json_neighbor_stats(struct peer *p) json_do_uint("eor", p->stats.prefix_rcvd_eor); json_do_end(); + json_do_object("pending"); + json_do_uint("updates", p->stats.pending_update); + json_do_uint("withdraws", p->stats.pending_withdraw); + json_do_end(); + json_do_end(); json_do_object("route-refresh"); @@ -931,7 +936,7 @@ json_rib_mem(struct rde_memstats *stats) stats->path_cnt * sizeof(struct rde_aspath), stats->path_refs); json_rib_mem_element("aspath", stats->aspath_cnt, - stats->aspath_size, stats->aspath_refs); + stats->aspath_size, UINT64_MAX); json_rib_mem_element("community_entries", stats->comm_cnt, stats->comm_cnt * sizeof(struct rde_community), UINT64_MAX); json_rib_mem_element("community", stats->comm_nmemb, Index: usr.sbin/bgpd/bgpd.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v retrieving revision 1.450 diff -u -p -r1.450 bgpd.h --- usr.sbin/bgpd/bgpd.h 26 Aug 2022 14:10:52 -0000 1.450 +++ usr.sbin/bgpd/bgpd.h 29 Aug 2022 12:07:01 -0000 @@ -1193,7 +1193,6 @@ struct rde_memstats { long long nexthop_cnt; long long aspath_cnt; long long aspath_size; - long long aspath_refs; long long comm_cnt; long long comm_nmemb; long long comm_size; Index: usr.sbin/bgpd/rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v retrieving revision 1.565 diff -u -p -r1.565 rde.c --- usr.sbin/bgpd/rde.c 26 Aug 2022 14:10:52 -0000 1.565 +++ usr.sbin/bgpd/rde.c 29 Aug 2022 12:09:36 -0000 @@ -197,9 +197,6 @@ rde_main(int debug, int verbose) /* initialize the RIB structures */ pt_init(); - path_init(pathhashsize); - aspath_init(pathhashsize); - communities_init(attrhashsize); attr_init(attrhashsize); nexthop_init(nexthophashsize); peer_init(peerhashsize); @@ -407,7 +404,6 @@ rde_dispatch_imsg_session(struct imsgbuf break; } if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(sup)) - fatalx("incorrect size of session request"); memcpy(&sup, imsg.data, sizeof(sup)); peer_up(peer, &sup); /* make sure rde_eval_all is on if needed. */ @@ -623,6 +619,8 @@ badnetdel: peer->prefix_sent_withdraw; p.stats.prefix_sent_eor = peer->prefix_sent_eor; + p.stats.pending_update = peer->up_nlricnt; + p.stats.pending_withdraw = peer->up_wcnt; } imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0, imsg.hdr.pid, -1, &p, sizeof(struct peer)); @@ -630,15 +628,6 @@ badnetdel: case IMSG_CTL_SHOW_RIB_MEM: imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0, imsg.hdr.pid, -1, &rdemem, sizeof(rdemem)); - path_hash_stats(&rdehash); - imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0, - imsg.hdr.pid, -1, &rdehash, sizeof(rdehash)); - aspath_hash_stats(&rdehash); - imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0, - imsg.hdr.pid, -1, &rdehash, sizeof(rdehash)); - communities_hash_stats(&rdehash); - imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0, - imsg.hdr.pid, -1, &rdehash, sizeof(rdehash)); attr_hash_stats(&rdehash); imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0, imsg.hdr.pid, -1, &rdehash, sizeof(rdehash)); @@ -1637,7 +1626,9 @@ pathid_assign(struct rde_peer *peer, uin struct prefix *p = NULL; uint32_t path_id_tx; - /* Assign a send side path_id to all paths */ + /* + * Assign a send side path_id to all paths. + */ re = rib_get(rib_byid(RIB_ADJ_IN), prefix, prefixlen); if (re != NULL) p = prefix_bypeer(re, peer, path_id); @@ -4314,7 +4305,6 @@ rde_shutdown(void) rib_shutdown(); nexthop_shutdown(); path_shutdown(); - aspath_shutdown(); attr_shutdown(); pt_shutdown(); peer_shutdown(); Index: usr.sbin/bgpd/rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v retrieving revision 1.263 diff -u -p -r1.263 rde.h --- usr.sbin/bgpd/rde.h 26 Aug 2022 14:10:52 -0000 1.263 +++ usr.sbin/bgpd/rde.h 29 Aug 2022 12:07:01 -0000 @@ -71,9 +71,7 @@ struct rib { * Currently I assume that we can do that with the neighbor_ip... */ LIST_HEAD(rde_peer_head, rde_peer); -LIST_HEAD(aspath_list, aspath); LIST_HEAD(attr_list, attr); -LIST_HEAD(aspath_head, rde_aspath); RB_HEAD(prefix_tree, prefix); RB_HEAD(prefix_index, prefix); struct iq; @@ -123,9 +121,7 @@ struct rde_peer { #define ASPATH_HEADER_SIZE (offsetof(struct aspath, data)) struct aspath { - LIST_ENTRY(aspath) entry; uint32_t source_as; /* cached source_as */ - int refcnt; /* reference count */ uint16_t len; /* total length of aspath in octets */ uint16_t ascnt; /* number of AS hops in data */ u_char data[1]; /* placeholder for actual data */ @@ -183,9 +179,9 @@ struct mpattr { }; struct rde_community { - LIST_ENTRY(rde_community) entry; - size_t size; - size_t nentries; + RB_ENTRY(rde_community) entry; + int size; + int nentries; int flags; int refcnt; struct community *communities; @@ -219,20 +215,18 @@ struct rde_community { #define DEFAULT_LPREF 100 struct rde_aspath { - LIST_ENTRY(rde_aspath) path_l; + RB_ENTRY(rde_aspath) entry; struct attr **others; struct aspath *aspath; uint64_t hash; int refcnt; uint32_t flags; /* internally used */ -#define aspath_hashstart med uint32_t med; /* multi exit disc */ uint32_t lpref; /* local pref */ uint32_t weight; /* low prio lpref */ uint16_t rtlabelid; /* route label id */ uint16_t pftableid; /* pf table id */ uint8_t origin; -#define aspath_hashend others_len uint8_t others_len; }; @@ -449,10 +443,8 @@ void attr_free(struct rde_aspath *, st #define attr_optlen(x) \ ((x)->len > 255 ? (x)->len + 4 : (x)->len + 3) -void aspath_init(uint32_t); -void aspath_shutdown(void); -void aspath_hash_stats(struct rde_hashstats *); struct aspath *aspath_get(void *, uint16_t); +struct aspath *aspath_copy(struct aspath *); void aspath_put(struct aspath *); u_char *aspath_deflate(u_char *, uint16_t *, int *); void aspath_merge(struct rde_aspath *, struct attr *); @@ -486,7 +478,6 @@ int community_large_write(struct rde_com int community_ext_write(struct rde_community *, int, void *, uint16_t); int community_writebuf(struct ibuf *, struct rde_community *); -void communities_init(uint32_t); void communities_shutdown(void); void communities_hash_stats(struct rde_hashstats *); struct rde_community *communities_lookup(struct rde_community *); @@ -594,10 +585,7 @@ re_rib(struct rib_entry *re) return rib_byid(re->rib_id); } -void path_init(uint32_t); void path_shutdown(void); -void path_hash_stats(struct rde_hashstats *); -int path_compare(struct rde_aspath *, struct rde_aspath *); uint32_t path_remove_stale(struct rde_aspath *, uint8_t, time_t); struct rde_aspath *path_copy(struct rde_aspath *, const struct rde_aspath *); struct rde_aspath *path_prep(struct rde_aspath *); Index: usr.sbin/bgpd/rde_attr.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_attr.c,v retrieving revision 1.127 diff -u -p -r1.127 rde_attr.c --- usr.sbin/bgpd/rde_attr.c 28 Jul 2022 13:11:51 -0000 1.127 +++ usr.sbin/bgpd/rde_attr.c 24 Aug 2022 08:16:02 -0000 @@ -446,102 +446,57 @@ static uint32_t aspath_extract_origin(co static uint16_t aspath_countlength(struct aspath *, uint16_t, int); static void aspath_countcopy(struct aspath *, uint16_t, uint8_t *, uint16_t, int); -struct aspath *aspath_lookup(const void *, uint16_t); -struct aspath_table { - struct aspath_list *hashtbl; - uint32_t hashmask; -} astable; - -SIPHASH_KEY astablekey; - -#define ASPATH_HASH(x) \ - &astable.hashtbl[(x) & astable.hashmask] - -void -aspath_init(uint32_t hashsize) +int +aspath_compare(struct aspath *a1, struct aspath *a2) { - uint32_t hs, i; - - for (hs = 1; hs < hashsize; hs <<= 1) - ; - astable.hashtbl = calloc(hs, sizeof(struct aspath_list)); - if (astable.hashtbl == NULL) - fatal("aspath_init"); - - for (i = 0; i < hs; i++) - LIST_INIT(&astable.hashtbl[i]); + int r; - astable.hashmask = hs - 1; - arc4random_buf(&astablekey, sizeof(astablekey)); + if (a1->len > a2->len) + return (1); + if (a1->len < a2->len) + return (-1); + r = memcmp(a1->data, a2->data, a1->len); + if (r > 0) + return (1); + if (r < 0) + return (-1); + return (0); } -void -aspath_shutdown(void) +struct aspath * +aspath_get(void *data, uint16_t len) { - uint32_t i; + struct aspath *aspath; - for (i = 0; i <= astable.hashmask; i++) - if (!LIST_EMPTY(&astable.hashtbl[i])) - log_warnx("aspath_shutdown: free non-free table"); + aspath = malloc(ASPATH_HEADER_SIZE + len); + if (aspath == NULL) + fatal("aspath_get"); - free(astable.hashtbl); -} + rdemem.aspath_cnt++; + rdemem.aspath_size += ASPATH_HEADER_SIZE + len; -void -aspath_hash_stats(struct rde_hashstats *hs) -{ - struct aspath *a; - uint32_t i; - int64_t n; - - memset(hs, 0, sizeof(*hs)); - strlcpy(hs->name, "aspath hash", sizeof(hs->name)); - hs->min = LLONG_MAX; - hs->num = astable.hashmask + 1; - - for (i = 0; i <= astable.hashmask; i++) { - n = 0; - LIST_FOREACH(a, &astable.hashtbl[i], entry) - n++; - if (n < hs->min) - hs->min = n; - if (n > hs->max) - hs->max = n; - hs->sum += n; - hs->sumq += n * n; - } + aspath->len = len; + aspath->ascnt = aspath_count(data, len); + aspath->source_as = aspath_extract_origin(data, len); + memcpy(aspath->data, data, len); + + return (aspath); } struct aspath * -aspath_get(void *data, uint16_t len) +aspath_copy(struct aspath *a) { - struct aspath_list *head; struct aspath *aspath; - /* The aspath must already have been checked for correctness. */ - aspath = aspath_lookup(data, len); - if (aspath == NULL) { - aspath = malloc(ASPATH_HEADER_SIZE + len); - if (aspath == NULL) - fatal("aspath_get"); - - rdemem.aspath_cnt++; - rdemem.aspath_size += ASPATH_HEADER_SIZE + len; - - aspath->refcnt = 0; - aspath->len = len; - aspath->ascnt = aspath_count(data, len); - aspath->source_as = aspath_extract_origin(data, len); - memcpy(aspath->data, data, len); - - /* link */ - head = ASPATH_HASH(SipHash24(&astablekey, aspath->data, - aspath->len)); - LIST_INSERT_HEAD(head, aspath, entry); - } - aspath->refcnt++; - rdemem.aspath_refs++; + aspath = malloc(ASPATH_HEADER_SIZE + a->len); + if (aspath == NULL) + fatal("aspath_get"); + + rdemem.aspath_cnt++; + rdemem.aspath_size += ASPATH_HEADER_SIZE + a->len; + + memcpy(aspath, a, ASPATH_HEADER_SIZE + a->len); return (aspath); } @@ -552,15 +507,6 @@ aspath_put(struct aspath *aspath) if (aspath == NULL) return; - rdemem.aspath_refs--; - if (--aspath->refcnt > 0) { - /* somebody still holds a reference */ - return; - } - - /* unlink */ - LIST_REMOVE(aspath, entry); - rdemem.aspath_cnt--; rdemem.aspath_size -= ASPATH_HEADER_SIZE + aspath->len; free(aspath); @@ -849,41 +795,6 @@ aspath_loopfree(struct aspath *aspath, u } return (1); } - -int -aspath_compare(struct aspath *a1, struct aspath *a2) -{ - int r; - - if (a1->len > a2->len) - return (1); - if (a1->len < a2->len) - return (-1); - r = memcmp(a1->data, a2->data, a1->len); - if (r > 0) - return (1); - if (r < 0) - return (-1); - return (0); -} - -struct aspath * -aspath_lookup(const void *data, uint16_t len) -{ - struct aspath_list *head; - struct aspath *aspath; - uint32_t hash; - - hash = SipHash24(&astablekey, data, len); - head = ASPATH_HASH(hash); - - LIST_FOREACH(aspath, head, entry) { - if (len == aspath->len && memcmp(data, aspath->data, len) == 0) - return (aspath); - } - return (NULL); -} - static int as_compare(struct filter_as *f, uint32_t as, uint32_t neighas) Index: usr.sbin/bgpd/rde_community.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_community.c,v retrieving revision 1.7 diff -u -p -r1.7 rde_community.c --- usr.sbin/bgpd/rde_community.c 28 Jul 2022 13:11:51 -0000 1.7 +++ usr.sbin/bgpd/rde_community.c 24 Aug 2022 12:14:57 -0000 @@ -209,12 +209,12 @@ mask_match(struct community *a, struct c static void insert_community(struct rde_community *comm, struct community *c) { - size_t l; + int l; int r; if (comm->nentries + 1 > comm->size) { struct community *new; - size_t newsize = comm->size + 8; + int newsize = comm->size + 8; if ((new = reallocarray(comm->communities, newsize, sizeof(struct community))) == NULL) @@ -261,7 +261,7 @@ community_match(struct rde_community *co struct rde_peer *peer) { struct community test, mask; - size_t l; + int l; if (fc->flags >> 8 == 0) { /* fast path */ @@ -288,7 +288,7 @@ struct rde_peer *peer) int community_count(struct rde_community *comm, uint8_t type) { - size_t l; + int l; int count = 0; /* use the fact that the array is ordered by type */ @@ -351,7 +351,7 @@ struct rde_peer *peer) { struct community test, mask; struct community *match; - size_t l = 0; + int l = 0; if (fc->flags >> 8 == 0) { /* fast path */ @@ -501,8 +501,8 @@ community_write(struct rde_community *co { uint8_t *b = buf; uint16_t c; - size_t l, n = 0; - int r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; + size_t n = 0; + int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; if (comm->flags & PARTIAL_COMMUNITIES) flags |= ATTR_PARTIAL; @@ -545,8 +545,8 @@ community_large_write(struct rde_communi { uint8_t *b = buf; uint32_t c; - size_t l, n = 0; - int r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; + size_t n = 0; + int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; if (comm->flags & PARTIAL_LARGE_COMMUNITIES) flags |= ATTR_PARTIAL; @@ -596,8 +596,8 @@ community_ext_write(struct rde_community struct community *cp; uint8_t *b = buf; uint64_t ext; - size_t l, n = 0; - int r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; + size_t n = 0; + int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; if (comm->flags & PARTIAL_EXT_COMMUNITIES) flags |= ATTR_PARTIAL; @@ -654,8 +654,8 @@ community_ext_write(struct rde_community int community_writebuf(struct ibuf *buf, struct rde_community *comm) { - size_t l, basic_n = 0, large_n = 0, ext_n = 0; - int flags; + size_t basic_n = 0, large_n = 0, ext_n = 0; + int l, flags; /* first count how many communities will be written */ for (l = 0; l < comm->nentries; l++) @@ -764,113 +764,44 @@ community_writebuf(struct ibuf *buf, str /* * Global RIB cache for communities */ -LIST_HEAD(commhead, rde_community); - -static struct comm_table { - struct commhead *hashtbl; - uint64_t hashmask; -} commtable; - -static SIPHASH_KEY commtablekey; - -static inline struct commhead * -communities_hash(struct rde_community *comm) -{ - SIPHASH_CTX ctx; - uint64_t hash; - - SipHash24_Init(&ctx, &commtablekey); - SipHash24_Update(&ctx, &comm->nentries, sizeof(comm->nentries)); - SipHash24_Update(&ctx, &comm->flags, sizeof(comm->flags)); - if (comm->nentries > 0) - SipHash24_Update(&ctx, comm->communities, - comm->nentries * sizeof(*comm->communities)); - hash = SipHash24_End(&ctx); - - return &commtable.hashtbl[hash & commtable.hashmask]; -} - -void -communities_init(uint32_t hashsize) +static inline int +communities_compare(struct rde_community *a, struct rde_community *b) { - uint32_t hs, i; + if (a->nentries != b->nentries) + return a->nentries - b->nentries; + if (a->flags != b->flags) + return a->flags - b->flags; - arc4random_buf(&commtablekey, sizeof(commtablekey)); - for (hs = 1; hs < hashsize; hs <<= 1) - ; - commtable.hashtbl = calloc(hs, sizeof(*commtable.hashtbl)); - if (commtable.hashtbl == NULL) - fatal(__func__); - - for (i = 0; i < hs; i++) - LIST_INIT(&commtable.hashtbl[i]); - commtable.hashmask = hs - 1; + return memcmp(a->communities, b->communities, + a->nentries * sizeof(struct community)); } -void -communities_shutdown(void) -{ - uint64_t i; - - for (i = 0; i <= commtable.hashmask; i++) - if (!LIST_EMPTY(&commtable.hashtbl[i])) - log_warnx("%s: free non-free table", __func__); - - free(commtable.hashtbl); -} +RB_HEAD(comm_tree, rde_community) commtable = RB_INITIALIZER(&commtable); +RB_GENERATE_STATIC(comm_tree, rde_community, entry, communities_compare); void -communities_hash_stats(struct rde_hashstats *hs) +communities_shutdown(void) { - struct rde_community *c; - uint64_t i; - int64_t n; - - memset(hs, 0, sizeof(*hs)); - strlcpy(hs->name, "comm hash", sizeof(hs->name)); - hs->min = LLONG_MAX; - hs->num = commtable.hashmask + 1; - - for (i = 0; i <= commtable.hashmask; i++) { - n = 0; - LIST_FOREACH(c, &commtable.hashtbl[i], entry) - n++; - if (n < hs->min) - hs->min = n; - if (n > hs->max) - hs->max = n; - hs->sum += n; - hs->sumq += n * n; - } + if (!RB_EMPTY(&commtable)) + log_warnx("%s: free non-free table", __func__); } struct rde_community * communities_lookup(struct rde_community *comm) { - struct rde_community *c; - struct commhead *head; - - head = communities_hash(comm); - LIST_FOREACH(c, head, entry) { - if (communities_equal(comm, c)) - return c; - } - return NULL; + return RB_FIND(comm_tree, &commtable, comm); } struct rde_community * communities_link(struct rde_community *comm) { struct rde_community *n; - struct commhead *head; if ((n = malloc(sizeof(*n))) == NULL) fatal(__func__); - communities_copy(n, comm); - head = communities_hash(n); - LIST_INSERT_HEAD(head, n, entry); + RB_INSERT(comm_tree, &commtable, n); n->refcnt = 1; /* initial reference by the cache */ rdemem.comm_size += n->size; @@ -886,7 +817,7 @@ communities_unlink(struct rde_community if (comm->refcnt != 1) fatalx("%s: unlinking still referenced communities", __func__); - LIST_REMOVE(comm, entry); + RB_REMOVE(comm_tree, &commtable, comm); rdemem.comm_size -= comm->size; rdemem.comm_nmemb -= comm->nentries; Index: usr.sbin/bgpd/rde_rib.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v retrieving revision 1.244 diff -u -p -r1.244 rde_rib.c --- usr.sbin/bgpd/rde_rib.c 25 Aug 2022 08:10:25 -0000 1.244 +++ usr.sbin/bgpd/rde_rib.c 26 Aug 2022 07:38:27 -0000 @@ -547,99 +547,10 @@ rib_dump_new(uint16_t id, uint8_t aid, u /* path specific functions */ static struct rde_aspath *path_lookup(struct rde_aspath *); -static uint64_t path_hash(struct rde_aspath *); static void path_link(struct rde_aspath *); static void path_unlink(struct rde_aspath *); -struct path_table { - struct aspath_head *path_hashtbl; - uint64_t path_hashmask; -} pathtable; - -SIPHASH_KEY pathtablekey; - -#define PATH_HASH(x) &pathtable.path_hashtbl[x & pathtable.path_hashmask] - -static inline struct rde_aspath * -path_ref(struct rde_aspath *asp) -{ - if ((asp->flags & F_ATTR_LINKED) == 0) - fatalx("%s: unlinked object", __func__); - asp->refcnt++; - rdemem.path_refs++; - - return asp; -} - -static inline void -path_unref(struct rde_aspath *asp) -{ - if (asp == NULL) - return; - if ((asp->flags & F_ATTR_LINKED) == 0) - fatalx("%s: unlinked object", __func__); - asp->refcnt--; - rdemem.path_refs--; - if (asp->refcnt <= 0) - path_unlink(asp); -} - -void -path_init(uint32_t hashsize) -{ - uint32_t hs, i; - - for (hs = 1; hs < hashsize; hs <<= 1) - ; - pathtable.path_hashtbl = calloc(hs, sizeof(*pathtable.path_hashtbl)); - if (pathtable.path_hashtbl == NULL) - fatal("path_init"); - - for (i = 0; i < hs; i++) - LIST_INIT(&pathtable.path_hashtbl[i]); - - pathtable.path_hashmask = hs - 1; - arc4random_buf(&pathtablekey, sizeof(pathtablekey)); -} - -void -path_shutdown(void) -{ - uint32_t i; - - for (i = 0; i <= pathtable.path_hashmask; i++) - if (!LIST_EMPTY(&pathtable.path_hashtbl[i])) - log_warnx("path_free: free non-free table"); - - free(pathtable.path_hashtbl); -} - -void -path_hash_stats(struct rde_hashstats *hs) -{ - struct rde_aspath *a; - uint32_t i; - int64_t n; - - memset(hs, 0, sizeof(*hs)); - strlcpy(hs->name, "path hash", sizeof(hs->name)); - hs->min = LLONG_MAX; - hs->num = pathtable.path_hashmask + 1; - - for (i = 0; i <= pathtable.path_hashmask; i++) { - n = 0; - LIST_FOREACH(a, &pathtable.path_hashtbl[i], path_l) - n++; - if (n < hs->min) - hs->min = n; - if (n > hs->max) - hs->max = n; - hs->sum += n; - hs->sumq += n * n; - } -} - -int +static inline int path_compare(struct rde_aspath *a, struct rde_aspath *b) { int r; @@ -688,40 +599,44 @@ path_compare(struct rde_aspath *a, struc return (attr_compare(a, b)); } -static uint64_t -path_hash(struct rde_aspath *asp) -{ - SIPHASH_CTX ctx; - uint64_t hash; +RB_HEAD(path_tree, rde_aspath) pathtable = RB_INITIALIZER(&pathtable); +RB_GENERATE_STATIC(path_tree, rde_aspath, entry, path_compare); - SipHash24_Init(&ctx, &pathtablekey); - SipHash24_Update(&ctx, &asp->aspath_hashstart, - (char *)&asp->aspath_hashend - (char *)&asp->aspath_hashstart); +static inline struct rde_aspath * +path_ref(struct rde_aspath *asp) +{ + if ((asp->flags & F_ATTR_LINKED) == 0) + fatalx("%s: unlinked object", __func__); + asp->refcnt++; + rdemem.path_refs++; - if (asp->aspath) - SipHash24_Update(&ctx, asp->aspath->data, asp->aspath->len); + return asp; +} - hash = attr_hash(asp); - SipHash24_Update(&ctx, &hash, sizeof(hash)); +static inline void +path_unref(struct rde_aspath *asp) +{ + if (asp == NULL) + return; + if ((asp->flags & F_ATTR_LINKED) == 0) + fatalx("%s: unlinked object", __func__); + asp->refcnt--; + rdemem.path_refs--; + if (asp->refcnt <= 0) + path_unlink(asp); +} - return (SipHash24_End(&ctx)); +void +path_shutdown(void) +{ + if (!RB_EMPTY(&pathtable)) + log_warnx("path_free: free non-free table"); } static struct rde_aspath * path_lookup(struct rde_aspath *aspath) { - struct aspath_head *head; - struct rde_aspath *asp; - uint64_t hash; - - hash = path_hash(aspath); - head = PATH_HASH(hash); - - LIST_FOREACH(asp, head, path_l) { - if (asp->hash == hash && path_compare(aspath, asp) == 0) - return (asp); - } - return (NULL); + return (RB_FIND(path_tree, &pathtable, aspath)); } /* @@ -731,12 +646,7 @@ path_lookup(struct rde_aspath *aspath) static void path_link(struct rde_aspath *asp) { - struct aspath_head *head; - - asp->hash = path_hash(asp); - head = PATH_HASH(asp->hash); - - LIST_INSERT_HEAD(head, asp, path_l); + RB_INSERT(path_tree, &pathtable, asp); asp->flags |= F_ATTR_LINKED; } @@ -754,7 +664,7 @@ path_unlink(struct rde_aspath *asp) if (asp->refcnt != 0) fatalx("%s: still holds references", __func__); - LIST_REMOVE(asp, path_l); + RB_REMOVE(path_tree, &pathtable, asp); asp->flags &= ~F_ATTR_LINKED; path_put(asp); @@ -767,11 +677,7 @@ path_unlink(struct rde_aspath *asp) struct rde_aspath * path_copy(struct rde_aspath *dst, const struct rde_aspath *src) { - dst->aspath = src->aspath; - if (dst->aspath != NULL) { - dst->aspath->refcnt++; - rdemem.aspath_refs++; - } + dst->aspath = aspath_copy(src->aspath); dst->hash = 0; /* not linked so no hash and no refcnt */ dst->refcnt = 0; dst->flags = src->flags & ~F_ATTR_LINKED; @@ -823,6 +729,7 @@ path_clean(struct rde_aspath *asp) rtlabel_unref(asp->rtlabelid); pftable_unref(asp->pftableid); aspath_put(asp->aspath); + asp->aspath = NULL; attr_freeall(asp); } Index: usr.sbin/bgpd/session.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/session.h,v retrieving revision 1.157 diff -u -p -r1.157 session.h --- usr.sbin/bgpd/session.h 28 Jul 2022 13:11:51 -0000 1.157 +++ usr.sbin/bgpd/session.h 25 Aug 2022 13:23:17 -0000 @@ -179,6 +179,8 @@ struct peer_stats { time_t last_write; uint32_t prefix_cnt; uint32_t prefix_out_cnt; + uint32_t pending_update; + uint32_t pending_withdraw; uint8_t last_sent_errcode; uint8_t last_sent_suberr; uint8_t last_rcvd_errcode;