On 31/08/15(Mon) 11:52, Martin Pieuchot wrote: > On 25/08/15(Tue) 12:27, Martin Pieuchot wrote: > > On 12/08/15(Wed) 17:03, Martin Pieuchot wrote: > > > I'm currently working on the routing table interface to make is safe > > > to use by multiple CPUs at the same time. The diff below is a big > > > step in this direction and I'd really appreciate if people could test > > > it with their usual network setup and report back. > > > > Updated version to match recent changes. I'm still looking for test > > reports and reviews. > > Thanks to all the testers. Here's a third version that should fix a > regression reported by phessler@. The idea is to use rtvalid(9) to > check early if the gateway route is still valid or not. If it isn't > then call rtalloc(9) again to perform the ARP/NDP resolution.
Smaller diff now that rtisvalid(9) is in! > Tests & reviews are still welcome! Note that this diff plugs a rt leak for free. Index: net/route.c =================================================================== RCS file: /cvs/src/sys/net/route.c,v retrieving revision 1.228 diff -u -p -r1.228 route.c --- net/route.c 1 Sep 2015 12:50:03 -0000 1.228 +++ net/route.c 1 Sep 2015 14:41:36 -0000 @@ -153,6 +153,7 @@ int rtable_alloc(void ***, u_int); int rtflushclone1(struct rtentry *, void *, u_int); void rtflushclone(unsigned int, struct rtentry *); int rt_if_remove_rtdelete(struct rtentry *, void *, u_int); +struct rtentry *rt_match(struct sockaddr *, int, unsigned int); struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *, u_int); @@ -300,11 +301,8 @@ rtable_exists(u_int id) /* verify table return (1); } -/* - * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise. - */ -int -rtisvalid(struct rtentry *rt) +static inline int +rt_isvalid(struct rtentry *rt) { if (rt == NULL) return (0); @@ -319,19 +317,48 @@ rtisvalid(struct rtentry *rt) return (1); } +/* + * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise. + */ +int +rtisvalid(struct rtentry *rt) +{ + if (!rt_isvalid(rt)) + return (0); + + /* Next hop must also be valid. */ + if ((rt->rt_flags & RTF_GATEWAY) && !rt_isvalid(rt->rt_gwroute)) + return (0); + + return (1); +} + +/* + * Do the actual lookup for rtalloc(9), do not use directly! + * + * Return the best matching entry for the destination ``dst''. + * + * "RT_RESOLVE" means that a corresponding L2 entry should + * be added to the routing table and resolved (via ARP or + * NDP), if it does not exist. + * + * "RT_REPORT" indicates that a message should be sent to + * userland if no matching route has been found or if an + * error occured while adding a L2 entry. + */ struct rtentry * -rtalloc(struct sockaddr *dst, int flags, unsigned int tableid) +rt_match(struct sockaddr *dst, int flags, unsigned int tableid) { struct rtentry *rt; struct rtentry *newrt = NULL; struct rt_addrinfo info; - int s, error = 0, msgtype = RTM_MISS; + int s, error = 0; - s = splsoftnet(); bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; + s = splsoftnet(); rt = rtable_match(tableid, dst); if (rt != NULL) { newrt = rt; @@ -344,10 +371,6 @@ rtalloc(struct sockaddr *dst, int flags, goto miss; } rt = newrt; - if (rt->rt_flags & RTF_XRESOLVE) { - msgtype = RTM_RESOLVE; - goto miss; - } /* Inform listeners of the new route */ rt_sendmsg(rt, RTM_ADD, tableid); } else @@ -355,11 +378,8 @@ rtalloc(struct sockaddr *dst, int flags, } else { rtstat.rts_unreach++; miss: - if (ISSET(flags, RT_REPORT)) { - bzero((caddr_t)&info, sizeof(info)); - info.rti_info[RTAX_DST] = dst; - rt_missmsg(msgtype, &info, 0, NULL, error, tableid); - } + if (ISSET(flags, RT_REPORT)) + rt_missmsg(RTM_MISS, &info, 0, NULL, error, tableid); } splx(s); return (newrt); @@ -393,6 +413,75 @@ rtalloc_mpath(struct sockaddr *dst, uint } #endif /* SMALL_KERNEL */ +/* + * Look in the routing table for the best matching entry for + * ``dst''. + * + * If a route with a gateway is found and its next hop is no + * longer valid, try to cache it. + */ +struct rtentry * +rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid) +{ + struct rtentry *rt, *nhrt; + + rt = rt_match(dst, flags, rtableid); + + /* No match or route to host? We're done. */ + if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) == 0) + return (rt); + + nhrt = rt->rt_gwroute; + + /* Nothing to do if the next hop is valid. */ + if (nhrt != NULL && (nhrt->rt_flags & RTF_UP)) + return (rt); + + rtfree(rt->rt_gwroute); + rt->rt_gwroute = NULL; + + /* + * If we cannot find a valid next hop, return the route + * with a gateway. + * + * Some dragons hiding in the tree certainly depends on + * this behavior. + */ + nhrt = rt_match(rt->rt_gateway, flags | RT_RESOLVE, rtableid); + if (nhrt == NULL) + return (rt); + + /* + * Next hop must be reachable, this also prevents rtentry + * loops for example when rt->rt_gwroute points to rt. + */ + if ((nhrt->rt_flags & (RTF_UP|RTF_CLONING|RTF_GATEWAY)) != RTF_UP) { + rtfree(nhrt); + return (rt); + } + + /* + * Next hop entry MUST be on the same interface. + * + * This check is needed as long as we support stall + * ``ifa'' pointers. + */ + if (nhrt->rt_ifp != rt->rt_ifp) { + rtfree(nhrt); + return (rt); + } + + /* + * If the MTU of next hop is 0, this will reset the MTU of the + * route to run PMTUD again from scratch. + */ + if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu)) + rt->rt_mtu = nhrt->rt_mtu; + + rt->rt_gwroute = nhrt; + return (rt); +} + void rtfree(struct rtentry *rt) { @@ -546,7 +635,7 @@ create: rt->rt_flags |= RTF_MODIFIED; flags |= RTF_MODIFIED; stat = &rtstat.rts_newgateway; - rt_setgate(rt, gateway, rdomain); + rt_setgate(rt, gateway); } } else error = EHOSTUNREACH; @@ -995,8 +1084,7 @@ rtrequest1(int req, struct rt_addrinfo * * the routing table because the radix MPATH code use * it to (re)order routes. */ - if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY], - tableid))) { + if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]))) { free(ndst, M_RTABLE, dlen); pool_put(&rtentry_pool, rt); return (error); @@ -1047,7 +1135,7 @@ rtrequest1(int req, struct rt_addrinfo * } int -rt_setgate(struct rtentry *rt, struct sockaddr *gate, unsigned int tableid) +rt_setgate(struct rtentry *rt, struct sockaddr *gate) { int glen = ROUNDUP(gate->sa_len); struct sockaddr *sa; @@ -1065,22 +1153,7 @@ rt_setgate(struct rtentry *rt, struct so rtfree(rt->rt_gwroute); rt->rt_gwroute = NULL; } - if (rt->rt_flags & RTF_GATEWAY) { - /* XXX is this actually valid to cross tables here? */ - rt->rt_gwroute = rtalloc(gate, RT_REPORT|RT_RESOLVE, tableid); - /* - * If we switched gateways, grab the MTU from the new - * gateway route if the current MTU is 0 or greater - * than the MTU of gateway. - * Note that, if the MTU of gateway is 0, we will reset the - * MTU of the route to run PMTUD again from scratch. XXX - */ - if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) && - rt->rt_rmx.rmx_mtu && - rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) { - rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu; - } - } + return (0); } @@ -1092,28 +1165,21 @@ rt_checkgate(struct ifnet *ifp, struct r KASSERT(rt != NULL); - if ((rt->rt_flags & RTF_UP) == 0) { - rt = rtalloc(dst, RT_REPORT|RT_RESOLVE, rtableid); - if (rt == NULL) - return (EHOSTUNREACH); - rt->rt_refcnt--; - if (rt->rt_ifp != ifp) - return (EHOSTUNREACH); - } + if ((rt->rt_flags & RTF_UP) == 0) + return (EHOSTUNREACH); rt0 = rt; if (rt->rt_flags & RTF_GATEWAY) { - if (rt->rt_gwroute && !(rt->rt_gwroute->rt_flags & RTF_UP)) { + if (rt->rt_gwroute == NULL) + return (EHOSTUNREACH); + + if ((rt->rt_gwroute->rt_flags & RTF_UP) == 0) { rtfree(rt->rt_gwroute); rt->rt_gwroute = NULL; + return (EHOSTUNREACH); } - if (rt->rt_gwroute == NULL) { - rt->rt_gwroute = rtalloc(rt->rt_gateway, - RT_REPORT|RT_RESOLVE, rtableid); - if (rt->rt_gwroute == NULL) - return (EHOSTUNREACH); - } + /* * Next hop must be reachable, this also prevents rtentry * loops, for example when rt->rt_gwroute points to rt. Index: net/route.h =================================================================== RCS file: /cvs/src/sys/net/route.h,v retrieving revision 1.111 diff -u -p -r1.111 route.h --- net/route.h 1 Sep 2015 12:50:03 -0000 1.111 +++ net/route.h 1 Sep 2015 14:34:50 -0000 @@ -118,6 +118,8 @@ struct rtentry { }; #define rt_use rt_rmx.rmx_pksent #define rt_expire rt_rmx.rmx_expire +#define rt_locks rt_rmx.rmx_locks +#define rt_mtu rt_rmx.rmx_mtu #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ @@ -361,7 +363,7 @@ void rt_sendmsg(struct rtentry *, int, void rt_sendaddrmsg(struct rtentry *, int); void rt_missmsg(int, struct rt_addrinfo *, int, struct ifnet *, int, u_int); -int rt_setgate(struct rtentry *, struct sockaddr *, unsigned int); +int rt_setgate(struct rtentry *, struct sockaddr *); int rt_checkgate(struct ifnet *, struct rtentry *, struct sockaddr *, unsigned int, struct rtentry **); void rt_setmetrics(u_long, struct rt_metrics *, struct rt_kmetrics *); Index: net/rtsock.c =================================================================== RCS file: /cvs/src/sys/net/rtsock.c,v retrieving revision 1.169 diff -u -p -r1.169 rtsock.c --- net/rtsock.c 24 Aug 2015 22:11:33 -0000 1.169 +++ net/rtsock.c 1 Sep 2015 14:34:50 -0000 @@ -744,9 +744,8 @@ report: info.rti_info[RTAX_GATEWAY]->sa_len)) { newgate = 1; } - if (info.rti_info[RTAX_GATEWAY] != NULL && - (error = rt_setgate(rt, info.rti_info[RTAX_GATEWAY], - tableid))) + if (info.rti_info[RTAX_GATEWAY] != NULL && (error = + rt_setgate(rt, info.rti_info[RTAX_GATEWAY]))) goto flush; /* * new gateway could require new ifaddr, ifp;