On 23/01/21(Sat) 12:22, Denis Fondras wrote:
> Le Sat, Jan 09, 2021 at 06:50:50PM +0100, Denis Fondras a écrit :
> > This diff place the user-set source address outside of struct art_root and
> > make
> > the code more readable (to me).
> >
> > Based on a concept by mpi@
Comments below.
> > Index: net/rtsock.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/rtsock.c,v
> > retrieving revision 1.304
> > diff -u -p -r1.304 rtsock.c
> > --- net/rtsock.c 7 Nov 2020 09:51:40 -0000 1.304
> > +++ net/rtsock.c 9 Jan 2021 16:04:02 -0000
> > @@ -138,7 +138,8 @@ int sysctl_iflist(int, struct walkarg
> > int sysctl_ifnames(struct walkarg *);
> > int sysctl_rtable_rtstat(void *, size_t *, void *);
> >
> > -int rt_setsource(unsigned int, struct sockaddr *);
> > +int rt_sourceset(struct rtentry *, unsigned int);
> > +struct rtentry *rt_get_rt(int, unsigned int);
I don't understand what's the use for rt_get_rt(), the name of the
function isn't helping either, more on that below.
> > /*
> > * Locks used to protect struct members
> > @@ -170,6 +171,14 @@ struct rtptable {
> > struct pool rtpcb_pool;
> > struct rtptable rtptable;
> >
> > +struct rt_srcaddr {
> > + LIST_ENTRY(rt_srcaddr) rts_next;
> > + unsigned int rts_rtableid;
> > + struct rtentry *rts_rt;
> > +};
> > +
> > +LIST_HEAD(, rt_srcaddr) srcaddr_h = LIST_HEAD_INITIALIZER(srcaddr_h);
> > +
Could you document which lock are protecting those fields? Can you
assert such lock is held when accessing them? Could you also document
what this data structure is for?
> > /*
> > * These flags and timeout are used for indicating to userland (via a
> > * RTM_DESYNC msg) when the route socket has overflowed and messages
> > @@ -664,10 +673,7 @@ rtm_report(struct rtentry *rt, u_char ty
> > ifp = if_get(rt->rt_ifidx);
> > if (ifp != NULL) {
> > info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
> > - info.rti_info[RTAX_IFA] =
> > - rtable_getsource(tableid,
> > info.rti_info[RTAX_DST]->sa_family);
> > - if (info.rti_info[RTAX_IFA] == NULL)
> > - info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
> > + info.rti_info[RTAX_IFA] = rt_get_ifa(rt, tableid)->ifa_addr;
> > if (ifp->if_flags & IFF_POINTOPOINT)
With the introduction of rt_get_ifa() is there any place left in the
network stack where `rt->rt_ifa' is accessed directly? Is there a
reason? Should we explain when using the function is necessary in a
comment on top of it?
> > info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
> > }
> > @@ -860,10 +866,28 @@ route_output(struct mbuf *m, struct sock
> > if (info.rti_info[RTAX_IFA] == NULL) {
> > error = EINVAL;
> > goto fail;
> > + } else if ((info.rti_info[RTAX_IFA]->sa_family == AF_INET6 &&
> > + IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)
> > + info.rti_info[RTAX_IFA])->sin6_addr)) ||
> > + (info.rti_info[RTAX_IFA]->sa_family == AF_INET &&
> > + ((struct sockaddr_in *)
> > + info.rti_info[RTAX_IFA])->sin_addr.s_addr == 0)) {
Do I understand correctly that the default route is used as a magic
value to clear any preferred source routing entry? Isn't it possible to
retrieve this route via rtalloc(9) instead of rt_get_rt()?
Why do we need a `rt' at all? If it's because rt_sourceclear() expects
one, can we change this expectation?
> > +
> > rt_sourceclear(rt_get_rt(info.rti_info[RTAX_IFA]->sa_family,
> > + tableid), tableid);
> > + rtfree(rt);
> > + rt = NULL;
> > + } else {
> > + rt = rtalloc(info.rti_info[RTAX_IFA], 0, tableid);
> > + if (rt == NULL || !ISSET(rt->rt_flags, RTF_LOCAL)) {
> > + error = EINVAL;
> > + goto fail;
> > + }
> > + NET_LOCK();
> > + error = rt_sourceset(rt, tableid);
> > + NET_UNLOCK();
Could you push the NET_LOCK() down and do the allocation before grabbing
it? We should refrain from calling malloc(9) with such lock held.
Maybe the NET_LOCK() is not the lock we want here, but this can be
changed later.
> > + if (error != 0)
> > + goto fail;
> > }
> > - if ((error =
> > - rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0)
> > - goto fail;
> > } else {
> > error = rtm_output(rtm, &rt, &info, prio, tableid);
> > if (!error) {
> > @@ -873,9 +897,9 @@ route_output(struct mbuf *m, struct sock
> > rtm = rtm_report(rt, type, seq, tableid);
> > len = rtm->rtm_msglen;
> > }
> > + rtfree(rt);
> > }
> >
> > - rtfree(rt);
> > if (error) {
> > rtm->rtm_errno = error;
> > } else {
> > @@ -1687,10 +1711,7 @@ rtm_send(struct rtentry *rt, int cmd, in
> > ifp = if_get(rt->rt_ifidx);
> > if (ifp != NULL) {
> > info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
> > - info.rti_info[RTAX_IFA] =
> > - rtable_getsource(rtableid,
> > info.rti_info[RTAX_DST]->sa_family);
> > - if (info.rti_info[RTAX_IFA] == NULL)
> > - info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
> > + info.rti_info[RTAX_IFA] = rt_get_ifa(rt, rtableid)->ifa_addr;
> > }
> >
> > rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
> > @@ -1928,10 +1949,7 @@ sysctl_dumpentry(struct rtentry *rt, voi
> > ifp = if_get(rt->rt_ifidx);
> > if (ifp != NULL) {
> > info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
> > - info.rti_info[RTAX_IFA] =
> > - rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family);
> > - if (info.rti_info[RTAX_IFA] == NULL)
> > - info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
> > + info.rti_info[RTAX_IFA] = rt_get_ifa(rt, id)->ifa_addr;
> > if (ifp->if_flags & IFF_POINTOPOINT)
> > info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
> > }
> > @@ -2067,33 +2085,30 @@ sysctl_ifnames(struct walkarg *w)
> > }
> >
> > int
> > -sysctl_source(int af, u_int tableid, struct walkarg *w)
> > +sysctl_source(int af, struct walkarg *w)
> > {
> > - struct sockaddr *sa;
> > - int size, error = 0;
> > + struct rt_srcaddr *rtsa = NULL;
> > + unsigned int tableid = w->w_arg;
> > + int error = 0;
> >
> > - sa = rtable_getsource(tableid, af);
> > - if (sa) {
> > - switch (sa->sa_family) {
> > - case AF_INET:
> > - size = sizeof(struct sockaddr_in);
> > - break;
> > -#ifdef INET6
> > - case AF_INET6:
> > - size = sizeof(struct sockaddr_in6);
> > - break;
> > -#endif
> > - default:
> > - return (0);
> > - }
> > - w->w_needed += size;
> > + LIST_FOREACH(rtsa, &srcaddr_h, rts_next) {
> > + if (rtsa->rts_rtableid != tableid)
> > + continue;
> > + if (af != 0 && rtsa->rts_rt->rt_dest->sa_family != af)
> > + continue;
> > +
> > + w->w_needed += rtsa->rts_rt->rt_dest->sa_len;
> > if (w->w_where && w->w_needed <= 0) {
> > - if ((error = copyout(sa, w->w_where, size)))
> > - return (error);
> > - w->w_where += size;
> > + error = copyout(rtsa->rts_rt->rt_ifa->ifa_addr,
> > + w->w_where, rtsa->rts_rt->rt_dest->sa_len);
> > + if (error == EAFNOSUPPORT)
> > + error = 0;
> > + if (error)
> > + break;
> > + w->w_where += rtsa->rts_rt->rt_dest->sa_len;
> > }
> > }
> > - return (0);
> > + return (error);
> > }
> >
> > int
> > @@ -2171,16 +2186,7 @@ sysctl_rtable(int *name, u_int namelen,
> > if (!rtable_exists(tableid))
> > return (ENOENT);
> > NET_LOCK();
> > - for (i = 1; i <= AF_MAX; i++) {
> > - if (af != 0 && af != i)
> > - continue;
> > -
> > - error = sysctl_source(i, tableid, &w);
> > - if (error == EAFNOSUPPORT)
> > - error = 0;
> > - if (error)
> > - break;
> > - }
> > + error = sysctl_source(af, &w);
> > NET_UNLOCK();
Same here could you push the lock inside the function?
> > break;
> > }
> > @@ -2314,40 +2320,96 @@ rtm_validate_proposal(struct rt_addrinfo
> > }
> >
> > int
> > -rt_setsource(unsigned int rtableid, struct sockaddr *src)
> > +rt_sourceset(struct rtentry *rt, unsigned int rtableid)
> > {
> > - struct ifaddr *ifa;
> > - /*
> > - * If source address is 0.0.0.0 or ::
> > - * use automatic source selection
> > - */
> > - switch(src->sa_family) {
> > - case AF_INET:
> > - if(satosin(src)->sin_addr.s_addr == INADDR_ANY) {
> > - rtable_setsource(rtableid, AF_INET, NULL);
> > - return (0);
> > - }
> > - break;
> > -#ifdef INET6
> > - case AF_INET6:
> > - if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) {
> > - rtable_setsource(rtableid, AF_INET6, NULL);
> > - return (0);
> > + struct rt_srcaddr *rtsa = NULL;
> > +
> > + LIST_FOREACH(rtsa, &srcaddr_h, rts_next) {
> > + if (rtsa->rts_rtableid == rtableid &&
> > + rtsa->rts_rt->rt_dest->sa_family == rt->rt_dest->sa_family)
> > + break;
> > + }
> > + if (rtsa == NULL) {
> > + if ((rtsa = malloc(sizeof(struct rt_srcaddr), M_IFADDR,
> > + M_NOWAIT|M_ZERO)) == NULL)
> > + return (ENOMEM);
> > + rtsa->rts_rtableid = rtableid;
> > + rtsa->rts_rt = rt;
> > + LIST_INSERT_HEAD(&srcaddr_h, rtsa, rts_next);
> > + } else {
> > + /* Update existing entry */
> > + rtfree(rtsa->rts_rt);
> > + rtsa->rts_rt = rt;
> > + }
> > +
> > + return (0);
> > +}
> > +
> > +/*
> > + * Return the 'ifa' associated to a given 'rt' unless a preferred
> > + * source address has been specified for the same routing table.
> > + */
> > +struct rtentry *
> > +rt_get_rt(int af, unsigned int rtableid)
> > +{
> > + struct rt_srcaddr *rtsa = NULL;
> > +
> > + LIST_FOREACH(rtsa, &srcaddr_h, rts_next) {
> > + if (rtsa->rts_rtableid == rtableid &&
> > + rtsa->rts_rt->rt_dest->sa_family == af)
> > + break;
> > + }
> > + if (rtsa)
> > + return (rtsa->rts_rt);
> > +
> > + return (NULL);
> > +}
Here we should document that it is safe to dereference `ifa' as long as
the caller owns a valid reference to `rt'.
> > +struct ifaddr *
> > +rt_get_ifa(struct rtentry *rt, unsigned int rtableid)
> > +{
> > + struct rt_srcaddr *rtsa = NULL;
> > + struct ifnet *ifp = NULL;
> > +
> > + if (ISSET(rt->rt_flags, RTF_HOST|RTF_LLINFO))
> > + return (rt->rt_ifa);
> > +
Please add an assert for the lock here, just before iterating on a data
structure
that requires it.
> > + LIST_FOREACH(rtsa, &srcaddr_h, rts_next) {
> > + if (rtsa->rts_rtableid == rtableid &&
> > + rtsa->rts_rt->rt_dest->sa_family == rt->rt_dest->sa_family)
> > + break;
> > + }
> > + if (rtsa != NULL && rtisvalid(rtsa->rts_rt)) {
> > + struct rtentry *nrt = rtsa->rts_rt;
> > +
> > + ifp = if_get(nrt->rt_ifidx);
> > + if (ifp != NULL) {
> > + if (ISSET(ifp->if_flags, IFF_UP)) {
> > + if_put(ifp);
> > + return (nrt->rt_ifa);
> > + }
I don't understand this check. The `ifp' can be brought down at
anytime. This seems like a workaround for something and should be at
least documented.
> > }
> > - break;
> > -#endif
> > - default:
> > - return (EAFNOSUPPORT);
> > }
> > + return (rt->rt_ifa);
> > +}
> >
> > - /*
> > - * Check if source address is assigned to an interface in the
> > - * same rdomain
> > - */
> > - if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL)
> > - return (EINVAL);
> > +void
> > +rt_sourceclear(struct rtentry *rt, unsigned int rtableid)
> > +{
> > + struct rt_srcaddr *rtsa = NULL;
> >
> > - return (rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr));
> > + if (rt == NULL)
> > + return;
> > +
> > + LIST_FOREACH(rtsa, &srcaddr_h, rts_next) {
> > + if (rtsa->rts_rtableid != rtableid)
> > + continue;
> > + if (rtsa->rts_rt == rt) {
> > + LIST_REMOVE(rtsa, rts_next);
I'd suggest using the 'break' idiom and not calling a LIST_REMOVE()
inside a list which isn't a *_SAFE() variant, that is IMHO less
confusing.
> > + free(rtsa, M_IFADDR, sizeof(struct rt_srcaddr));
> > + break;
> > + }
> > + }
> > }
> >
> > /*
> > Index: netinet/in_pcb.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/in_pcb.c,v
> > retrieving revision 1.252
> > diff -u -p -r1.252 in_pcb.c
> > --- netinet/in_pcb.c 7 Nov 2020 09:51:40 -0000 1.252
> > +++ netinet/in_pcb.c 9 Jan 2021 16:04:02 -0000
> > @@ -887,7 +887,6 @@ in_pcbselsrc(struct in_addr **insrc, str
> > struct route *ro = &inp->inp_route;
> > struct in_addr *laddr = &inp->inp_laddr;
> > u_int rtableid = inp->inp_rtableid;
> > - struct sockaddr *ip4_source = NULL;
> >
> > struct sockaddr_in *sin2;
> > struct in_ifaddr *ia = NULL;
> > @@ -951,30 +950,11 @@ in_pcbselsrc(struct in_addr **insrc, str
> > }
> >
> > /*
> > - * If we found a route, use the address
> > - * corresponding to the outgoing interface.
> > + * If we found a route, use the address corresponding to the
> > + * outgoing interface or the preferred source address if set.
> > */
> > if (ro->ro_rt != NULL)
> > - ia = ifatoia(ro->ro_rt->rt_ifa);
> > -
> > - /*
> > - * Use preferred source address if :
> > - * - destination is not onlink
> > - * - preferred source addresss is set
> > - * - output interface is UP
> > - */
> > - if (ro->ro_rt && !(ro->ro_rt->rt_flags & RTF_LLINFO) &&
> > - !(ro->ro_rt->rt_flags & RTF_HOST)) {
> > - ip4_source = rtable_getsource(rtableid, AF_INET);
> > - if (ip4_source != NULL) {
> > - struct ifaddr *ifa;
> > - if ((ifa = ifa_ifwithaddr(ip4_source, rtableid)) !=
> > - NULL && ISSET(ifa->ifa_ifp->if_flags, IFF_UP)) {
> > - *insrc = &satosin(ip4_source)->sin_addr;
> > - return (0);
> > - }
> > - }
> > - }
> > + ia = ifatoia(rt_get_ifa(ro->ro_rt, rtableid));
> >
> > if (ia == NULL)
> > return (EADDRNOTAVAIL);
> > Index: netinet/ip_icmp.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/ip_icmp.c,v
> > retrieving revision 1.184
> > diff -u -p -r1.184 ip_icmp.c
> > --- netinet/ip_icmp.c 20 Dec 2020 21:15:47 -0000 1.184
> > +++ netinet/ip_icmp.c 9 Jan 2021 16:04:02 -0000
> > @@ -745,7 +745,7 @@ icmp_reflect(struct mbuf *m, struct mbuf
> > return (EHOSTUNREACH);
> > }
> >
> > - ia = ifatoia(rt->rt_ifa);
> > + ia = ifatoia(rt_get_ifa(rt, rtableid));
> > }
> >
> > ip->ip_dst = ip->ip_src;
> > Index: netinet6/icmp6.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet6/icmp6.c,v
> > retrieving revision 1.233
> > diff -u -p -r1.233 icmp6.c
> > --- netinet6/icmp6.c 28 Oct 2020 17:27:35 -0000 1.233
> > +++ netinet6/icmp6.c 9 Jan 2021 16:04:02 -0000
> > @@ -1163,7 +1163,10 @@ icmp6_reflect(struct mbuf **mp, size_t o
> > rtfree(rt);
> > goto bad;
> > }
> > - ia6 = in6_ifawithscope(rt->rt_ifa->ifa_ifp, &t, rtableid);
> > + ia6 = ifatoia6(rt_get_ifa(rt, rtableid));
> > + if (ia6 == NULL)
> > + ia6 = in6_ifawithscope(rt->rt_ifa->ifa_ifp, &t,
> > + rtableid);
> > if (ia6 != NULL)
> > src = &ia6->ia_addr.sin6_addr;
> > if (src == NULL)
> > Index: netinet6/in6_src.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet6/in6_src.c,v
> > retrieving revision 1.84
> > diff -u -p -r1.84 in6_src.c
> > --- netinet6/in6_src.c 7 Nov 2020 09:51:40 -0000 1.84
> > +++ netinet6/in6_src.c 9 Jan 2021 16:04:02 -0000
> > @@ -100,7 +100,6 @@ in6_pcbselsrc(struct in6_addr **in6src,
> > struct in6_addr *laddr = &inp->inp_laddr6;
> > u_int rtableid = inp->inp_rtableid;
> > struct ifnet *ifp = NULL;
> > - struct sockaddr *ip6_source = NULL;
> > struct in6_addr *dst;
> > struct in6_ifaddr *ia6 = NULL;
> > struct in6_pktinfo *pi = NULL;
> > @@ -208,32 +207,16 @@ in6_pcbselsrc(struct in6_addr **in6src,
> > */
> >
> > if (ro->ro_rt) {
> > - ifp = if_get(ro->ro_rt->rt_ifidx);
> > - if (ifp != NULL) {
> > - ia6 = in6_ifawithscope(ifp, dst, rtableid);
> > - if_put(ifp);
> > + ia6 = ifatoia6(rt_get_ifa(ro->ro_rt, rtableid));
> > + if (ia6 == NULL) {
> > + ifp = if_get(ro->ro_rt->rt_ifidx);
> > + if (ifp != NULL) {
> > + ia6 = in6_ifawithscope(ifp, dst, rtableid);
> > + if_put(ifp);
> > + }
> > }
> > if (ia6 == NULL) /* xxx scope error ?*/
> > ia6 = ifatoia6(ro->ro_rt->rt_ifa);
^^^^^^^^^^^^^^^^^
Shouldn't rt_get_ifa() be used here as well? Shouldn't the scope be
checked first?
I'd love to see this code unified with the logic in icmp6_reflect() bot
are different currently.
> > - }
> > -
> > - /*
> > - * Use preferred source address if :
> > - * - destination is not onlink
> > - * - preferred source addresss is set
> > - * - output interface is UP
> > - */
> > - if (ro->ro_rt && !(ro->ro_rt->rt_flags & RTF_LLINFO) &&
> > - !(ro->ro_rt->rt_flags & RTF_HOST)) {
> > - ip6_source = rtable_getsource(rtableid, AF_INET6);
> > - if (ip6_source != NULL) {
> > - struct ifaddr *ifa;
> > - if ((ifa = ifa_ifwithaddr(ip6_source, rtableid)) !=
> > - NULL && ISSET(ifa->ifa_ifp->if_flags, IFF_UP)) {
> > - *in6src = &satosin6(ip6_source)->sin6_addr;
> > - return (0);
> > - }
> > - }
> > }
> >
> > if (ia6 == NULL)