Currently we leave RTF_STATIC route entries in the table when the address they are attached to is removed from a system.
That's why ifas need to be refcounted and that's why we have *a lot* of checks in the stack to not use cached routes attached to such ifa. I'd like to simplify all of this by simply purging all the routes attached to an ifa being removed. This behavior is coherent with the fact that routes *need* an ifa to be inserted in the table. This makes the kernel simpler as it no longer try to find a new ifa when a route with a stale address is being used. Tests and oks welcome. Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.376 diff -u -p -r1.376 if.c --- net/if.c 12 Sep 2015 20:26:06 -0000 1.376 +++ net/if.c 13 Sep 2015 08:53:56 -0000 @@ -143,6 +143,7 @@ struct if_clone *if_clone_lookup(const c int if_group_egress_build(void); +void if_purgeaddrs(struct ifnet *); void if_link_state_change_task(void *); void if_input_process(void *); @@ -893,7 +894,6 @@ if_deactivate(struct ifnet *ifp) void if_detach(struct ifnet *ifp) { - struct ifaddr *ifa; struct ifg_list *ifg; struct domain *dp; int i, s; @@ -916,7 +916,6 @@ if_detach(struct ifnet *ifp) #if NBPFILTER > 0 bpfdetach(ifp); #endif - rt_if_remove(ifp); rti_delete(ifp); #if NETHER > 0 && defined(NFSCLIENT) if (ifp == revarp_ifp) @@ -944,16 +943,7 @@ if_detach(struct ifnet *ifp) if_free_sadl(ifp); /* We should not have any address left at this point. */ - if (!TAILQ_EMPTY(&ifp->if_addrlist)) { -#ifdef DIAGNOSTIC - printf("%s: address list non empty\n", ifp->if_xname); -#endif - while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) { - ifa_del(ifp, ifa); - ifa->ifa_ifp = NULL; - ifafree(ifa); - } - } + if_purgeaddrs(ifp); free(ifp->if_addrhooks, M_TEMP, 0); free(ifp->if_linkstatehooks, M_TEMP, 0); @@ -1570,6 +1560,28 @@ if_put(struct ifnet *ifp) atomic_dec_int(&ifp->if_refcnt); } + +/* + * Remove the remaining addresses from an interface, this should + * theorically not be necessary. + */ +void +if_purgeaddrs(struct ifnet *ifp) +{ + struct ifaddr *ifa; + + if (!TAILQ_EMPTY(&ifp->if_addrlist)) { +#ifdef DIAGNOSTIC + printf("%s: address list non empty\n", ifp->if_xname); +#endif + while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) { + ifa_del(ifp, ifa); + ifa->ifa_ifp = NULL; + ifafree(ifa); + } + } +} + /* * Interface ioctls. */ @@ -1912,7 +1924,6 @@ ifioctl(struct socket *so, u_long cmd, c */ if (up) if_down(ifp); - rt_if_remove(ifp); rti_delete(ifp); #ifdef MROUTING vif_delete(ifp); @@ -1921,6 +1932,10 @@ ifioctl(struct socket *so, u_long cmd, c in6_ifdetach(ifp); #endif in_ifdetach(ifp); + + /* We should not have any address left at this point. */ + if_purgeaddrs(ifp); + splx(s); } @@ -2509,6 +2524,7 @@ void ifa_del(struct ifnet *ifp, struct ifaddr *ifa) { TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list); + rt_ifa_purge(ifa); } void Index: net/route.c =================================================================== RCS file: /cvs/src/sys/net/route.c,v retrieving revision 1.239 diff -u -p -r1.239 route.c --- net/route.c 12 Sep 2015 20:50:17 -0000 1.239 +++ net/route.c 13 Sep 2015 09:00:43 -0000 @@ -152,7 +152,7 @@ void rt_timer_init(void); int rtable_alloc(void ***, u_int); int rtflushclone1(struct rtentry *, void *, u_int); void rtflushclone(unsigned int, struct rtentry *); -int rt_if_remove_rtdelete(struct rtentry *, void *, u_int); +int rt_ifa_remove_rtdelete(struct rtentry *, void *, unsigned int); struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *, u_int); @@ -850,22 +850,10 @@ rtrequest1(int req, struct rt_addrinfo * return (EINVAL); if ((rt->rt_flags & RTF_CLONING) == 0) return (EINVAL); - if (rt->rt_ifa->ifa_ifp) { - info->rti_ifa = rt->rt_ifa; - } else { - /* - * The address of the cloning route is not longer - * configured on an interface, but its descriptor - * is still there because of reference counting. - * - * Try to find a similar active address and use - * it for the cloned route. The cloning route - * will get the new address and interface later. - */ - info->rti_ifa = NULL; - info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; - } + if (rt->rt_ifa->ifa_ifp == NULL) + return (EAGAIN); + info->rti_ifa = rt->rt_ifa; info->rti_flags = rt->rt_flags & ~(RTF_CLONING | RTF_STATIC); info->rti_flags |= RTF_CLONED; info->rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl; @@ -966,25 +954,6 @@ rtrequest1(int req, struct rt_addrinfo * rt->rt_ifp = ifa->ifa_ifp; if (req == RTM_RESOLVE) { /* - * If the ifa of the cloning route was stale, a - * successful lookup for an ifa with the same address - * has been made. Use this ifa also for the cloning - * route. - */ - if ((*ret_nrt)->rt_ifa->ifa_ifp == NULL) { - printf("rtrequest1 RTM_RESOLVE: wrong ifa (%p) " - "was (%p)\n", ifa, (*ret_nrt)->rt_ifa); - if ((*ret_nrt)->rt_ifa->ifa_rtrequest) - (*ret_nrt)->rt_ifa->ifa_rtrequest( - RTM_DELETE, *ret_nrt); - ifafree((*ret_nrt)->rt_ifa); - (*ret_nrt)->rt_ifa = ifa; - (*ret_nrt)->rt_ifp = ifa->ifa_ifp; - ifa->ifa_refcnt++; - if (ifa->ifa_rtrequest) - ifa->ifa_rtrequest(RTM_ADD, *ret_nrt); - } - /* * Copy both metrics and a back pointer to the cloned * route's parent. */ @@ -1657,18 +1626,18 @@ rtlabel_unref(u_int16_t id) } void -rt_if_remove(struct ifnet *ifp) +rt_ifa_purge(struct ifaddr *ifa) { int i; u_int tid; for (tid = 0; tid <= rtbl_id_max; tid++) { /* skip rtables that are not in the rdomain of the ifp */ - if (rtable_l2(tid) != ifp->if_rdomain) + if (rtable_l2(tid) != ifa->ifa_ifp->if_rdomain) continue; for (i = 1; i <= AF_MAX; i++) { - while (rtable_walk(tid, i, rt_if_remove_rtdelete, - ifp) == EAGAIN) + while (rtable_walk(tid, i, rt_ifa_remove_rtdelete, + ifa) == EAGAIN) ; /* nothing */ } } @@ -1681,11 +1650,11 @@ rt_if_remove(struct ifnet *ifp) * as long as EAGAIN is returned. */ int -rt_if_remove_rtdelete(struct rtentry *rt, void *vifp, u_int id) +rt_ifa_remove_rtdelete(struct rtentry *rt, void *vifa, unsigned int id) { - struct ifnet *ifp = vifp; + struct ifaddr *ifa = vifa; - if (rt->rt_ifp == ifp) { + if (rt->rt_ifa == ifa) { int cloning = (rt->rt_flags & RTF_CLONING); if (rtdeletemsg(rt, id) == 0 && cloning) Index: net/route.h =================================================================== RCS file: /cvs/src/sys/net/route.h,v retrieving revision 1.113 diff -u -p -r1.113 route.h --- net/route.h 11 Sep 2015 21:27:49 -0000 1.113 +++ net/route.h 13 Sep 2015 08:53:56 -0000 @@ -392,13 +392,14 @@ int rt_ifa_add(struct ifaddr *, int, st int rt_ifa_del(struct ifaddr *, int, struct sockaddr *); int rt_ifa_addlocal(struct ifaddr *); int rt_ifa_dellocal(struct ifaddr *); +void rt_ifa_purge(struct ifaddr *); + int rtioctl(u_long, caddr_t, struct proc *); void rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, struct rtentry **, u_int); int rtrequest1(int, struct rt_addrinfo *, u_int8_t, struct rtentry **, u_int); -void rt_if_remove(struct ifnet *); #ifndef SMALL_KERNEL void rt_if_track(struct ifnet *); int rt_if_linkstate_change(struct rtentry *, void *, u_int); Index: netinet6/in6.c =================================================================== RCS file: /cvs/src/sys/netinet6/in6.c,v retrieving revision 1.175 diff -u -p -r1.175 in6.c --- netinet6/in6.c 12 Sep 2015 20:50:17 -0000 1.175 +++ netinet6/in6.c 13 Sep 2015 08:53:56 -0000 @@ -968,7 +968,6 @@ in6_unlink_ifa(struct in6_ifaddr *ia6, s /* * release another refcnt for the link from in6_ifaddr. - * Note that we should decrement the refcnt at least once for all *BSD. */ ifafree(&ia6->ia_ifa); }