On Tue, May 04, 2021 at 09:55:32AM +0200, Claudio Jeker wrote:
> Noticed by the arouteserver author Pier Carlo Chiodi the new rde evaluate
> all feature has a bug when a 2nd best route is withdrawn. In that case
> that route got not withdrawn from the adj-rib-out.
> 
> The problem is in up_generate_updates() and the fact that 'rde evaluate
> all' calls up_generate_updates() with old = NULL. For 'rde evaluate all'
> the code traverses the prefix list in new and once the last element is hit
> (new == NULL) the withdraw should be triggered. But since old == NULL it
> would just return without doing the withdraw.
> 
> The fix is to remove the old == NULL check from the withdraw case but to
> do that the prefix address and prefixlen needs to be extracted before
> entering the selection loop.
> 
> So extract the address once at start based on new or old (whichever is
> set). In case new and old are NULL just return (like before), since there
> is nothing todo.
> 

There is another bug that causes this to not work properly. This time it
is rde_evaluate_all() which is returning 0 (as in default mode) even
though there are peers with 'rde evaluate all' set. The problem is that
the rde_eval_all flag is tracked in the wrong spot and so if you change
the flag after prefixes have loaded and then withdraw a prefix it will not
realize that a peer needs evaluate all and skip to process this withdraw.

The following diff (rde.c part) changes the tracking of rde_eval_all. It
resets the flag during reload but also sets rde_eval_all when a new peer
is added.

The diff also includes the rde_update.c change from before.
-- 
:wq Claudio

Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.520
diff -u -p -r1.520 rde.c
--- rde.c       6 May 2021 09:18:54 -0000       1.520
+++ rde.c       11 May 2021 08:28:58 -0000
@@ -113,6 +113,7 @@ volatile sig_atomic_t        rde_quit = 0;
 struct filter_head     *out_rules, *out_rules_tmp;
 struct rde_memstats     rdemem;
 int                     softreconfig;
+static int              rde_eval_all;
 
 extern struct rde_peer_head     peerlist;
 extern struct rde_peer         *peerself;
@@ -400,6 +402,9 @@ rde_dispatch_imsg_session(struct imsgbuf
                                fatalx("incorrect size of session request");
                        memcpy(&pconf, imsg.data, sizeof(pconf));
                        peer_add(imsg.hdr.peerid, &pconf);
+                       /* make sure rde_eval_all is on if needed. */
+                       if (pconf.flags & PEERFLAG_EVALUATE_ALL)
+                               rde_eval_all = 1;
                        break;
                case IMSG_NETWORK_ADD:
                        if (imsg.hdr.len - IMSG_HEADER_SIZE !=
@@ -2884,8 +2889,6 @@ rde_send_kroute(struct rib *rib, struct 
 /*
  * update specific functions
  */
-static int rde_eval_all;
-
 int
 rde_evaluate_all(void)
 {
@@ -2915,17 +2918,13 @@ rde_generate_updates(struct rib *rib, st
        else
                aid = old->pt->aid;
 
-       rde_eval_all = 0;
        LIST_FOREACH(peer, &peerlist, peer_l) {
                /* skip ourself */
                if (peer == peerself)
                        continue;
                if (peer->state != PEER_UP)
                        continue;
-               /* handle evaluate all, keep track if it is needed */
-               if (peer->flags & PEERFLAG_EVALUATE_ALL)
-                       rde_eval_all = 1;
-               else if (eval_all)
+               if ((peer->flags & PEERFLAG_EVALUATE_ALL) == 0 && eval_all)
                        /* skip default peers if the best path didn't change */
                        continue;
                /* skip peers using a different rib */
@@ -3287,9 +3286,12 @@ rde_reload_done(void)
 
        rde_filter_calc_skip_steps(out_rules);
 
+       /* make sure that rde_eval_all is correctly set after a config change */
+       rde_eval_all = 0;
+
        /* check if filter changed */
        LIST_FOREACH(peer, &peerlist, peer_l) {
-               if (peer->conf.id == 0)
+               if (peer->conf.id == 0) /* ignore peerself*/
                        continue;
                peer->reconf_out = 0;
                peer->reconf_rib = 0;
@@ -3319,6 +3321,8 @@ rde_reload_done(void)
                }
                peer->export_type = peer->conf.export_type;
                peer->flags = peer->conf.flags;
+               if (peer->flags & PEERFLAG_EVALUATE_ALL)
+                       rde_eval_all = 1;
 
                if (peer->reconf_rib) {
                        if (prefix_dump_new(peer, AID_UNSPEC,
@@ -3336,6 +3340,7 @@ rde_reload_done(void)
                        peer->reconf_out = 1;
                }
        }
+
        /* bring ribs in sync */
        for (rid = 0; rid < rib_size; rid++) {
                struct rib *rib = rib_byid(rid);
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.127
diff -u -p -r1.127 rde_update.c
--- rde_update.c        6 May 2021 09:18:54 -0000       1.127
+++ rde_update.c        7 May 2021 15:27:58 -0000
@@ -101,17 +101,23 @@ up_generate_updates(struct filter_head *
        struct filterstate      state;
        struct bgpd_addr        addr;
        int                     need_withdraw;
+       uint8_t                 prefixlen;
 
-again:
        if (new == NULL) {
                if (old == NULL)
-                       /* no prefix to withdraw */
+                       /* no prefix to update or withdraw */
                        return;
+               pt_getaddr(old->pt, &addr);
+               prefixlen = old->pt->prefixlen;
+       } else {
+               pt_getaddr(new->pt, &addr);
+               prefixlen = new->pt->prefixlen;
+       }
 
+again:
+       if (new == NULL) {
                /* withdraw prefix */
-               pt_getaddr(old->pt, &addr);
-               if (prefix_adjout_withdraw(peer, &addr,
-                   old->pt->prefixlen) == 1) {
+               if (prefix_adjout_withdraw(peer, &addr, prefixlen) == 1) {
                        peer->prefix_out_cnt--;
                        peer->up_wcnt++;
                }
@@ -143,10 +149,8 @@ again:
                rde_filterstate_prep(&state, prefix_aspath(new),
                    prefix_communities(new), prefix_nexthop(new),
                    prefix_nhflags(new));
-               pt_getaddr(new->pt, &addr);
                if (rde_filter(rules, peer, prefix_peer(new), &addr,
-                   new->pt->prefixlen, prefix_vstate(new), &state) ==
-                   ACTION_DENY) {
+                   prefixlen, prefix_vstate(new), &state) == ACTION_DENY) {
                        rde_filterstate_clean(&state);
                        if (peer->flags & PEERFLAG_EVALUATE_ALL)
                                new = LIST_NEXT(new, entry.list.rib);

Reply via email to