On 12/08/15(Wed) 17:03, Martin Pieuchot wrote: > I'm currently working on the routing table interface to make is safe > to use by multiple CPUs at the same time. The diff below is a big > step in this direction and I'd really appreciate if people could test > it with their usual network setup and report back.
Below is an updated version of the diff now that bluhm@ fixed the potential loop with RTF_GATEWAY routes. Note that regression tests will fail because we no longer call rtalloc(9) inside rt_setgate(). In other words we do not cache a next-hop route before using it. Ok? Index: net/route.c =================================================================== RCS file: /cvs/src/sys/net/route.c,v retrieving revision 1.268 diff -u -p -r1.268 route.c --- net/route.c 4 Nov 2015 10:13:55 -0000 1.268 +++ net/route.c 4 Nov 2015 11:15:00 -0000 @@ -151,6 +151,7 @@ void rt_timer_init(void); int rtflushclone1(struct rtentry *, void *, u_int); void rtflushclone(unsigned int, struct rtentry *); int rt_if_remove_rtdelete(struct rtentry *, void *, u_int); +struct rtentry *rt_match(struct sockaddr *, int, unsigned int); struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *, u_int); @@ -194,6 +195,12 @@ rtisvalid(struct rtentry *rt) if (rt == NULL) return (0); +#ifdef DIAGNOSTIC + if (ISSET(rt->rt_flags, RTF_GATEWAY) && (rt->rt_gwroute != NULL) && + ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY)) + panic("next hop must be directly reachable"); +#endif + if ((rt->rt_flags & RTF_UP) == 0) return (0); @@ -201,11 +208,27 @@ rtisvalid(struct rtentry *rt) if (rt->rt_ifa == NULL || rt->rt_ifa->ifa_ifp == NULL) return (0); + if (ISSET(rt->rt_flags, RTF_GATEWAY) && !rtisvalid(rt->rt_gwroute)) + return (0); + return (1); } +/* + * Do the actual lookup for rtalloc(9), do not use directly! + * + * Return the best matching entry for the destination ``dst''. + * + * "RT_RESOLVE" means that a corresponding L2 entry should + * be added to the routing table and resolved (via ARP or + * NDP), if it does not exist. + * + * "RT_REPORT" indicates that a message should be sent to + * userland if no matching route has been found or if an + * error occured while adding a L2 entry. + */ struct rtentry * -rtalloc(struct sockaddr *dst, int flags, unsigned int tableid) +rt_match(struct sockaddr *dst, int flags, unsigned int tableid) { struct rtentry *rt0, *rt = NULL; struct rt_addrinfo info; @@ -336,6 +359,76 @@ rtalloc_mpath(struct sockaddr *dst, uint } #endif /* SMALL_KERNEL */ +/* + * Look in the routing table for the best matching entry for + * ``dst''. + * + * If a route with a gateway is found and its next hop is no + * longer valid, try to cache it. + */ +struct rtentry * +rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid) +{ + struct rtentry *rt, *nhrt; + + rt = rt_match(dst, flags, rtableid); + + /* No match or route to host? We're done. */ + if (rt == NULL || !ISSET(rt->rt_flags, RTF_GATEWAY)) + return (rt); + + /* Nothing to do if the next hop is valid. */ + if (rtisvalid(rt->rt_gwroute)) + return (rt); + + rtfree(rt->rt_gwroute); + rt->rt_gwroute = NULL; + + /* + * If we cannot find a valid next hop, return the route + * with a gateway. + * + * XXX Some dragons hiding in the tree certainly depends on + * this behavior. But it is safe since rt_checkgate() wont + * allow us to us this route later on. + */ + nhrt = rt_match(rt->rt_gateway, flags | RT_RESOLVE, rtableid); + if (nhrt == NULL) + return (rt); + + /* + * Next hop must be reachable, this also prevents rtentry + * loops for example when rt->rt_gwroute points to rt. + */ + if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) { + rtfree(nhrt); + return (rt); + } + + /* + * Next hop entry must be UP and on the same interface. + */ + if (!ISSET(nhrt->rt_flags, RTF_UP) || nhrt->rt_ifidx != rt->rt_ifidx) { + rtfree(nhrt); + return (rt); + } + + /* + * If the MTU of next hop is 0, this will reset the MTU of the + * route to run PMTUD again from scratch. + */ + if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu)) + rt->rt_mtu = nhrt->rt_mtu; + + /* + * Do not return the cached next-hop route, rt_checkgate() will + * do the magic for us. + */ + rt->rt_gwroute = nhrt; + + return (rt); +} + void rtref(struct rtentry *rt) { @@ -499,7 +592,7 @@ create: rt->rt_flags |= RTF_MODIFIED; flags |= RTF_MODIFIED; stat = &rtstat.rts_newgateway; - rt_setgate(rt, gateway, rdomain); + rt_setgate(rt, gateway); } } else error = EHOSTUNREACH; @@ -931,8 +1024,7 @@ rtrequest(int req, struct rt_addrinfo *i * the routing table because the radix MPATH code use * it to (re)order routes. */ - if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY], - tableid))) { + if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]))) { free(ndst, M_RTABLE, dlen); pool_put(&rtentry_pool, rt); return (error); @@ -985,7 +1077,7 @@ rtrequest(int req, struct rt_addrinfo *i } int -rt_setgate(struct rtentry *rt, struct sockaddr *gate, unsigned int tableid) +rt_setgate(struct rtentry *rt, struct sockaddr *gate) { int glen = ROUNDUP(gate->sa_len); struct sockaddr *sa; @@ -1003,22 +1095,7 @@ rt_setgate(struct rtentry *rt, struct so rtfree(rt->rt_gwroute); rt->rt_gwroute = NULL; } - if (rt->rt_flags & RTF_GATEWAY) { - /* XXX is this actually valid to cross tables here? */ - rt->rt_gwroute = rtalloc(gate, RT_REPORT|RT_RESOLVE, tableid); - /* - * If we switched gateways, grab the MTU from the new - * gateway route if the current MTU is 0 or greater - * than the MTU of gateway. - * Note that, if the MTU of gateway is 0, we will reset the - * MTU of the route to run PMTUD again from scratch. XXX - */ - if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) && - rt->rt_rmx.rmx_mtu && - rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) { - rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu; - } - } + return (0); } @@ -1033,26 +1110,8 @@ rt_checkgate(struct ifnet *ifp, struct r rt0 = rt; if (rt->rt_flags & RTF_GATEWAY) { - if (rt->rt_gwroute && !(rt->rt_gwroute->rt_flags & RTF_UP)) { - rtfree(rt->rt_gwroute); - rt->rt_gwroute = NULL; - } - if (rt->rt_gwroute == NULL) { - rt->rt_gwroute = rtalloc(rt->rt_gateway, - RT_REPORT|RT_RESOLVE, rtableid); - if (rt->rt_gwroute == NULL) - return (EHOSTUNREACH); - } - /* - * Next hop must be reachable, this also prevents rtentry - * loops, for example when rt->rt_gwroute points to rt. - */ - if (((rt->rt_gwroute->rt_flags & (RTF_UP|RTF_GATEWAY)) != - RTF_UP) || (rt->rt_gwroute->rt_ifidx != ifp->if_index)) { - rtfree(rt->rt_gwroute); - rt->rt_gwroute = NULL; + if (rt->rt_gwroute == NULL) return (EHOSTUNREACH); - } rt = rt->rt_gwroute; } Index: net/route.h =================================================================== RCS file: /cvs/src/sys/net/route.h,v retrieving revision 1.118 diff -u -p -r1.118 route.h --- net/route.h 30 Oct 2015 09:39:42 -0000 1.118 +++ net/route.h 4 Nov 2015 11:00:15 -0000 @@ -119,6 +119,8 @@ struct rtentry { }; #define rt_use rt_rmx.rmx_pksent #define rt_expire rt_rmx.rmx_expire +#define rt_locks rt_rmx.rmx_locks +#define rt_mtu rt_rmx.rmx_mtu #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ @@ -358,7 +360,7 @@ void rt_maskedcopy(struct sockaddr *, void rt_sendmsg(struct rtentry *, int, u_int); void rt_sendaddrmsg(struct rtentry *, int); void rt_missmsg(int, struct rt_addrinfo *, int, u_int, int, u_int); -int rt_setgate(struct rtentry *, struct sockaddr *, unsigned int); +int rt_setgate(struct rtentry *, struct sockaddr *); int rt_checkgate(struct ifnet *, struct rtentry *, struct sockaddr *, unsigned int, struct rtentry **); void rt_setmetrics(u_long, struct rt_metrics *, struct rt_kmetrics *); Index: net/rtsock.c =================================================================== RCS file: /cvs/src/sys/net/rtsock.c,v retrieving revision 1.181 diff -u -p -r1.181 rtsock.c --- net/rtsock.c 2 Nov 2015 14:40:09 -0000 1.181 +++ net/rtsock.c 4 Nov 2015 11:01:17 -0000 @@ -745,9 +745,8 @@ report: goto flush; ifa = info.rti_ifa; } - if (info.rti_info[RTAX_GATEWAY] != NULL && - (error = rt_setgate(rt, info.rti_info[RTAX_GATEWAY], - tableid))) + if (info.rti_info[RTAX_GATEWAY] != NULL && (error = + rt_setgate(rt, info.rti_info[RTAX_GATEWAY]))) goto flush; if (ifa) { if (rt->rt_ifa != ifa) { Index: net/if_spppsubr.c =================================================================== RCS file: /cvs/src/sys/net/if_spppsubr.c,v retrieving revision 1.144 diff -u -p -r1.144 if_spppsubr.c --- net/if_spppsubr.c 2 Nov 2015 11:19:30 -0000 1.144 +++ net/if_spppsubr.c 4 Nov 2015 11:03:24 -0000 @@ -4244,10 +4244,9 @@ sppp_update_gw_walker(struct rtentry *rt if (rt->rt_ifidx == ifp->if_index) { if (rt->rt_ifa->ifa_dstaddr->sa_family != rt->rt_gateway->sa_family || - (rt->rt_flags & RTF_GATEWAY) == 0) + !ISSET(rt->rt_flags, RTF_GATEWAY)) return (0); /* do not modify non-gateway routes */ - bcopy(rt->rt_ifa->ifa_dstaddr, rt->rt_gateway, - rt->rt_ifa->ifa_dstaddr->sa_len); + rt_setgate(rt, rt->rt_ifa->ifa_dstaddr); } return (0); }