On Tue, May 30, 2017 at 01:59:23PM +0200, Claudio Jeker wrote: > This is a step I need to do to make progress on the PF_KEY cleanup I'm > doing. Both PF_ROUTE and PF_KEY need to start to take care of their own > PCB list and so move the LIST_ENTRY out of rawcb into routecb. > This allows me to do the same in PF_KEY which will be sent as the next > diff. >
Cleaned up and updated diff. Now including both rtsock.c and the pfkey files. I think I fixed all things reported by mpi@. Especially the socket handling on create got reordered after better understanding what order is best. In short as long as the socket() call is not done the file descriptor for it is invalid and so it is the safest to do all the so fiddling first and delay the LIST_INSERT to the very end. Would like to commit this so I can move on. -- :wq Claudio Index: net/pfkeyv2.c =================================================================== RCS file: /cvs/src/sys/net/pfkeyv2.c,v retrieving revision 1.162 diff -u -p -r1.162 pfkeyv2.c --- net/pfkeyv2.c 26 Jun 2017 09:32:32 -0000 1.162 +++ net/pfkeyv2.c 30 Jun 2017 14:28:36 -0000 @@ -131,14 +131,16 @@ extern struct radix_node_head **spd_tabl struct sockaddr pfkey_addr = { 2, PF_KEY, }; struct domain pfkeydomain; -struct pfkeyv2_socket { - LIST_ENTRY(pfkeyv2_socket) kcb_list; - struct socket *socket; +struct keycb { + struct rawcb rcb; + LIST_ENTRY(keycb) kcb_list; int flags; uint32_t pid; uint32_t registration; /* Increase size if SATYPE_MAX > 31 */ uint rdomain; }; +#define sotokeycb(so) ((struct keycb *)(so)->so_pcb) + struct dump_state { struct sadb_msg *sadb_msg; @@ -146,8 +148,7 @@ struct dump_state { }; /* Static globals */ -static LIST_HEAD(, pfkeyv2_socket) pfkeyv2_sockets = - LIST_HEAD_INITIALIZER(pfkeyv2_sockets); +static LIST_HEAD(, keycb) pfkeyv2_sockets = LIST_HEAD_INITIALIZER(keycb); static uint32_t pfkeyv2_seq = 1; static int nregistered = 0; static int npromisc = 0; @@ -160,7 +161,7 @@ int pfkeyv2_usrreq(struct socket *, int, struct mbuf *, struct proc *); int pfkeyv2_output(struct mbuf *, struct socket *, struct sockaddr *, struct mbuf *); -int pfkey_sendup(struct socket *socket, struct mbuf *packet, int more); +int pfkey_sendup(struct keycb *, struct mbuf *, int); /* * Wrapper around m_devget(); copy data from contiguous buffer to mbuf @@ -212,72 +213,62 @@ pfkey_init(void) int pfkeyv2_attach(struct socket *so, int proto) { - struct pfkeyv2_socket *pfkeyv2_socket; + struct rawcb *rp; + struct keycb *pk; int error; if ((so->so_state & SS_PRIV) == 0) return EACCES; - if (!(so->so_pcb = malloc(sizeof(struct rawcb), - M_PCB, M_DONTWAIT | M_ZERO))) - return (ENOMEM); - - error = raw_attach(so, so->so_proto->pr_protocol); - if (error) - goto ret; - - ((struct rawcb *)so->so_pcb)->rcb_faddr = &pfkey_addr; + pk = malloc(sizeof(struct keycb), M_PCB, M_WAITOK | M_ZERO); + rp = &pk->rcb; + so->so_pcb = rp; + + error = raw_attach(so, proto); + if (error) { + free(pk, M_PCB, sizeof(struct keycb)); + return (error); + } - if (!(pfkeyv2_socket = malloc(sizeof(struct pfkeyv2_socket), - M_PFKEY, M_NOWAIT | M_ZERO))) - return (ENOMEM); + so->so_options |= SO_USELOOPBACK; + soisconnected(so); - LIST_INSERT_HEAD(&pfkeyv2_sockets, pfkeyv2_socket, kcb_list); - pfkeyv2_socket->socket = so; - pfkeyv2_socket->pid = curproc->p_p->ps_pid; + rp->rcb_faddr = &pfkey_addr; + pk->pid = curproc->p_p->ps_pid; /* * XXX we should get this from the socket instead but * XXX rawcb doesn't store the rdomain like inpcb does. */ - pfkeyv2_socket->rdomain = rtable_l2(curproc->p_p->ps_rtableid); + pk->rdomain = rtable_l2(curproc->p_p->ps_rtableid); - so->so_options |= SO_USELOOPBACK; - soisconnected(so); + LIST_INSERT_HEAD(&pfkeyv2_sockets, pk, kcb_list); return (0); -ret: - free(so->so_pcb, M_PCB, sizeof(struct rawcb)); - return (error); } /* * Close a PF_KEYv2 socket. */ int -pfkeyv2_detach(struct socket *socket, struct proc *p) +pfkeyv2_detach(struct socket *so, struct proc *p) { - struct pfkeyv2_socket *pp; - int error; - - LIST_FOREACH(pp, &pfkeyv2_sockets, kcb_list) - if (pp->socket == socket) - break; + struct keycb *pk; - if (pp) { - LIST_REMOVE(pp, kcb_list); + pk = sotokeycb(so); + if (pk == NULL) + return ENOTCONN; - if (pp->flags & PFKEYV2_SOCKETFLAGS_REGISTERED) - nregistered--; + LIST_REMOVE(pk, kcb_list); - if (pp->flags & PFKEYV2_SOCKETFLAGS_PROMISC) - npromisc--; + if (pk->flags & PFKEYV2_SOCKETFLAGS_REGISTERED) + nregistered--; - free(pp, M_PFKEY, 0); - } + if (pk->flags & PFKEYV2_SOCKETFLAGS_PROMISC) + npromisc--; - error = raw_usrreq(socket, PRU_DETACH, NULL, NULL, NULL, p); - return (error); + raw_detach(&pk->rcb); + return (0); } int @@ -293,7 +284,7 @@ pfkeyv2_usrreq(struct socket *so, int re } int -pfkeyv2_output(struct mbuf *mbuf, struct socket *socket, +pfkeyv2_output(struct mbuf *mbuf, struct socket *so, struct sockaddr *dstaddr, struct mbuf *control) { void *message; @@ -319,7 +310,7 @@ pfkeyv2_output(struct mbuf *mbuf, struct m_copydata(mbuf, 0, mbuf->m_pkthdr.len, message); - error = pfkeyv2_send(socket, message, mbuf->m_pkthdr.len); + error = pfkeyv2_send(so, message, mbuf->m_pkthdr.len); ret: m_freem(mbuf); @@ -327,8 +318,9 @@ ret: } int -pfkey_sendup(struct socket *so, struct mbuf *packet, int more) +pfkey_sendup(struct keycb *kp, struct mbuf *packet, int more) { + struct socket *so = kp->rcb.rcb_socket; struct mbuf *packet2; NET_ASSERT_LOCKED(); @@ -354,13 +346,13 @@ pfkey_sendup(struct socket *so, struct m * third argument. */ int -pfkeyv2_sendmessage(void **headers, int mode, struct socket *socket, +pfkeyv2_sendmessage(void **headers, int mode, struct socket *so, u_int8_t satype, int count, u_int rdomain) { int i, j, rval; void *p, *buffer = NULL; struct mbuf *packet; - struct pfkeyv2_socket *s; + struct keycb *s; struct sadb_msg *smsg; /* Find out how much space we'll need... */ @@ -401,7 +393,7 @@ pfkeyv2_sendmessage(void **headers, int * Send message to the specified socket, plus all * promiscuous listeners. */ - pfkey_sendup(socket, packet, 0); + pfkey_sendup(sotokeycb(so), packet, 0); /* * Promiscuous messages contain the original message @@ -426,9 +418,9 @@ pfkeyv2_sendmessage(void **headers, int */ LIST_FOREACH(s, &pfkeyv2_sockets, kcb_list) { if ((s->flags & PFKEYV2_SOCKETFLAGS_PROMISC) && - (s->socket != socket) && + (s->rcb.rcb_socket != so) && (s->rdomain == rdomain)) - pfkey_sendup(s->socket, packet, 1); + pfkey_sendup(s, packet, 1); } m_freem(packet); break; @@ -442,11 +434,11 @@ pfkeyv2_sendmessage(void **headers, int if ((s->flags & PFKEYV2_SOCKETFLAGS_REGISTERED) && (s->rdomain == rdomain)) { if (!satype) /* Just send to everyone registered */ - pfkey_sendup(s->socket, packet, 1); + pfkey_sendup(s, packet, 1); else { /* Check for specified satype */ if ((1 << satype) & s->registration) - pfkey_sendup(s->socket, packet, 1); + pfkey_sendup(s, packet, 1); } } } @@ -472,7 +464,7 @@ pfkeyv2_sendmessage(void **headers, int if ((s->flags & PFKEYV2_SOCKETFLAGS_PROMISC) && !(s->flags & PFKEYV2_SOCKETFLAGS_REGISTERED) && (s->rdomain == rdomain)) - pfkey_sendup(s->socket, packet, 1); + pfkey_sendup(s, packet, 1); } m_freem(packet); break; @@ -481,7 +473,7 @@ pfkeyv2_sendmessage(void **headers, int /* Send message to all sockets */ LIST_FOREACH(s, &pfkeyv2_sockets, kcb_list) { if (s->rdomain == rdomain) - pfkey_sendup(s->socket, packet, 1); + pfkey_sendup(s, packet, 1); } m_freem(packet); break; @@ -940,7 +932,7 @@ pfkeyv2_get_proto_alg(u_int8_t satype, u * Handle all messages from userland to kernel. */ int -pfkeyv2_send(struct socket *socket, void *message, int len) +pfkeyv2_send(struct socket *so, void *message, int len) { int i, j, s, rval = 0, mode = PFKEYV2_SENDMESSAGE_BROADCAST; int delflag = 0; @@ -950,7 +942,7 @@ pfkeyv2_send(struct socket *socket, void struct radix_node_head *rnh; struct radix_node *rn = NULL; - struct pfkeyv2_socket *pfkeyv2_socket, *so = NULL; + struct keycb *pk, *bpk = NULL; void *freeme = NULL, *bckptr = NULL; void *headers[SADB_EXT_MAX + 1]; @@ -972,16 +964,14 @@ pfkeyv2_send(struct socket *socket, void /* Verify that we received this over a legitimate pfkeyv2 socket */ bzero(headers, sizeof(headers)); - LIST_FOREACH(pfkeyv2_socket, &pfkeyv2_sockets, kcb_list) - if (pfkeyv2_socket->socket == socket) - break; + pk = sotokeycb(so); - if (!pfkeyv2_socket) { + if (!pk) { rval = EINVAL; goto ret; } - rdomain = pfkeyv2_socket->rdomain; + rdomain = pk->rdomain; /* If we have any promiscuous listeners, send them a copy of the message */ if (npromisc) { @@ -1010,10 +1000,10 @@ pfkeyv2_send(struct socket *socket, void goto ret; /* Send to all promiscuous listeners */ - LIST_FOREACH(so, &pfkeyv2_sockets, kcb_list) { - if ((so->flags & PFKEYV2_SOCKETFLAGS_PROMISC) && - (so->rdomain == rdomain)) - pfkey_sendup(so->socket, packet, 1); + LIST_FOREACH(bpk, &pfkeyv2_sockets, kcb_list) { + if ((bpk->flags & PFKEYV2_SOCKETFLAGS_PROMISC) && + (bpk->rdomain == rdomain)) + pfkey_sendup(bpk, packet, 1); } m_freem(packet); @@ -1402,8 +1392,8 @@ pfkeyv2_send(struct socket *socket, void break; case SADB_REGISTER: - if (!(pfkeyv2_socket->flags & PFKEYV2_SOCKETFLAGS_REGISTERED)) { - pfkeyv2_socket->flags |= PFKEYV2_SOCKETFLAGS_REGISTERED; + if (!(pk->flags & PFKEYV2_SOCKETFLAGS_REGISTERED)) { + pk->flags |= PFKEYV2_SOCKETFLAGS_REGISTERED; nregistered++; } @@ -1433,7 +1423,7 @@ pfkeyv2_send(struct socket *socket, void } /* Keep track what this socket has registered for */ - pfkeyv2_socket->registration |= (1 << ((struct sadb_msg *)message)->sadb_msg_satype); + pk->registration |= (1 << ((struct sadb_msg *)message)->sadb_msg_satype); ssup = (struct sadb_supported *) freeme; ssup->sadb_supported_len = i / sizeof(uint64_t); @@ -1506,7 +1496,7 @@ pfkeyv2_send(struct socket *socket, void { struct dump_state dump_state; dump_state.sadb_msg = (struct sadb_msg *) headers[0]; - dump_state.socket = socket; + dump_state.socket = so; rval = tdb_walk(rdomain, pfkeyv2_dump_walker, &dump_state); if (!rval) @@ -1778,12 +1768,12 @@ pfkeyv2_send(struct socket *socket, void if ((rval = pfdatatopacket(message, len, &packet)) != 0) goto ret; - LIST_FOREACH(so, &pfkeyv2_sockets, kcb_list) - if ((so != pfkeyv2_socket) && - (so->rdomain == rdomain) && + LIST_FOREACH(bpk, &pfkeyv2_sockets, kcb_list) + if ((bpk != pk) && + (bpk->rdomain == rdomain) && (!smsg->sadb_msg_seq || - (smsg->sadb_msg_seq == pfkeyv2_socket->pid))) - pfkey_sendup(so->socket, packet, 1); + (smsg->sadb_msg_seq == pk->pid))) + pfkey_sendup(bpk, packet, 1); m_freem(packet); } else { @@ -1792,17 +1782,17 @@ pfkeyv2_send(struct socket *socket, void goto ret; } - i = (pfkeyv2_socket->flags & + i = (pk->flags & PFKEYV2_SOCKETFLAGS_PROMISC) ? 1 : 0; j = smsg->sadb_msg_satype ? 1 : 0; if (i ^ j) { if (j) { - pfkeyv2_socket->flags |= + pk->flags |= PFKEYV2_SOCKETFLAGS_PROMISC; npromisc++; } else { - pfkeyv2_socket->flags &= + pk->flags &= ~PFKEYV2_SOCKETFLAGS_PROMISC; npromisc--; } @@ -1841,7 +1831,7 @@ ret: goto realret; } - rval = pfkeyv2_sendmessage(headers, mode, socket, 0, 0, rdomain); + rval = pfkeyv2_sendmessage(headers, mode, so, 0, 0, rdomain); realret: NET_UNLOCK(s); Index: net/raw_cb.c =================================================================== RCS file: /cvs/src/sys/net/raw_cb.c,v retrieving revision 1.11 diff -u -p -r1.11 raw_cb.c --- net/raw_cb.c 24 Jan 2017 10:08:30 -0000 1.11 +++ net/raw_cb.c 30 May 2017 08:44:05 -0000 @@ -46,16 +46,10 @@ /* * Routines to manage the raw protocol control blocks. - * - * TODO: - * hash lookups by protocol family/protocol + address family - * take care of unique address problems per AF? - * redo address binding to allow wildcards */ u_long raw_sendspace = RAWSNDQ; u_long raw_recvspace = RAWRCVQ; -struct rawcbhead rawcb; /* * Allocate a control block and a nominal amount @@ -72,14 +66,13 @@ raw_attach(struct socket *so, int proto) * after space has been allocated for the * rawcb. */ - if (rp == 0) + if (rp == NULL) return (ENOBUFS); if ((error = soreserve(so, raw_sendspace, raw_recvspace)) != 0) return (error); rp->rcb_socket = so; rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family; rp->rcb_proto.sp_protocol = proto; - LIST_INSERT_HEAD(&rawcb, rp, rcb_list); return (0); } @@ -94,7 +87,6 @@ raw_detach(struct rawcb *rp) so->so_pcb = 0; sofree(so); - LIST_REMOVE(rp, rcb_list); free((caddr_t)(rp), M_PCB, 0); } @@ -104,7 +96,6 @@ raw_detach(struct rawcb *rp) void raw_disconnect(struct rawcb *rp) { - if (rp->rcb_socket->so_state & SS_NOFDREF) raw_detach(rp); } Index: net/raw_cb.h =================================================================== RCS file: /cvs/src/sys/net/raw_cb.h,v retrieving revision 1.11 diff -u -p -r1.11 raw_cb.h --- net/raw_cb.h 23 Jan 2017 16:31:24 -0000 1.11 +++ net/raw_cb.h 30 May 2017 08:44:05 -0000 @@ -40,7 +40,6 @@ * to tie a socket to the generic raw interface. */ struct rawcb { - LIST_ENTRY(rawcb) rcb_list; /* doubly linked list */ struct socket *rcb_socket; /* back pointer to socket */ struct sockaddr *rcb_faddr; /* destination address */ struct sockaddr *rcb_laddr; /* socket's address */ @@ -54,8 +53,6 @@ struct rawcb { #define RAWRCVQ 8192 #ifdef _KERNEL - -extern LIST_HEAD(rawcbhead, rawcb) rawcb; /* head of list */ #define sotorawcb(so) ((struct rawcb *)(so)->so_pcb) int raw_attach(struct socket *, int); Index: net/raw_usrreq.c =================================================================== RCS file: /cvs/src/sys/net/raw_usrreq.c,v retrieving revision 1.31 diff -u -p -r1.31 raw_usrreq.c --- net/raw_usrreq.c 13 Mar 2017 20:18:21 -0000 1.31 +++ net/raw_usrreq.c 30 May 2017 08:44:05 -0000 @@ -45,15 +45,6 @@ #include <net/raw_cb.h> #include <sys/stdarg.h> -/* - * Initialize raw connection block q. - */ -void -raw_init(void) -{ - - LIST_INIT(&rawcb); -} int raw_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, @@ -71,7 +62,7 @@ raw_usrreq(struct socket *so, int req, s m_freem(m); return (EOPNOTSUPP); } - if (rp == 0) { + if (rp == NULL) { m_freem(m); return (EINVAL); } @@ -81,10 +72,6 @@ raw_usrreq(struct socket *so, int req, s * Flush data or not depending on the options. */ case PRU_DETACH: - if (rp == 0) { - error = ENOTCONN; - break; - } raw_detach(rp); break; Index: net/rtsock.c =================================================================== RCS file: /cvs/src/sys/net/rtsock.c,v retrieving revision 1.239 diff -u -p -r1.239 rtsock.c --- net/rtsock.c 26 Jun 2017 09:32:32 -0000 1.239 +++ net/rtsock.c 30 Jun 2017 14:29:54 -0000 @@ -98,6 +98,7 @@ struct walkarg { caddr_t w_where, w_tmem; }; +void route_prinit(void); int route_output(struct mbuf *, struct socket *, struct sockaddr *, struct mbuf *); int route_ctloutput(int, struct socket *, int, int, struct mbuf *); @@ -126,19 +127,21 @@ int sysctl_ifnames(struct walkarg *); int sysctl_rtable_rtstat(void *, size_t *, void *); struct routecb { - struct rawcb rcb; - struct timeout timeout; - unsigned int msgfilter; - unsigned int flags; - u_int rtableid; + struct rawcb rcb; + LIST_ENTRY(routecb) rcb_list; + struct timeout timeout; + unsigned int msgfilter; + unsigned int flags; + u_int rtableid; }; #define sotoroutecb(so) ((struct routecb *)(so)->so_pcb) struct route_cb { - int ip_count; - int ip6_count; - int mpls_count; - int any_count; + LIST_HEAD(, routecb) rcb; + int ip_count; + int ip6_count; + int mpls_count; + int any_count; }; struct route_cb route_cb; @@ -154,45 +157,53 @@ struct route_cb route_cb; #define ROUTE_DESYNC_RESEND_TIMEOUT (hz / 5) /* In hz */ +void +route_prinit(void) +{ + LIST_INIT(&route_cb.rcb); +} + + int route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct proc *p) { - struct rawcb *rp; struct routecb *rop; int af; int error = 0; - rp = sotorawcb(so); + rop = sotoroutecb(so); + if (rop == NULL) { + m_freem(m); + return (EINVAL); + } switch (req) { case PRU_RCVD: - rop = (struct routecb *)rp; - /* * If we are in a FLUSH state, check if the buffer is * empty so that we can clear the flag. */ if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) && - ((sbspace(rp->rcb_socket, &rp->rcb_socket->so_rcv) == - rp->rcb_socket->so_rcv.sb_hiwat))) + ((sbspace(rop->rcb.rcb_socket, + &rop->rcb.rcb_socket->so_rcv) == + rop->rcb.rcb_socket->so_rcv.sb_hiwat))) rop->flags &= ~ROUTECB_FLAG_FLUSH; break; case PRU_DETACH: - if (rp) { - timeout_del(&((struct routecb *)rp)->timeout); - af = rp->rcb_proto.sp_protocol; - if (af == AF_INET) - route_cb.ip_count--; - else if (af == AF_INET6) - route_cb.ip6_count--; + timeout_del(&rop->timeout); + af = rop->rcb.rcb_proto.sp_protocol; + if (af == AF_INET) + route_cb.ip_count--; + else if (af == AF_INET6) + route_cb.ip6_count--; #ifdef MPLS - else if (af == AF_MPLS) - route_cb.mpls_count--; + else if (af == AF_MPLS) + route_cb.mpls_count--; #endif - route_cb.any_count--; - } + route_cb.any_count--; + LIST_REMOVE(rop, rcb_list); /* FALLTHROUGH */ default: error = raw_usrreq(so, req, m, nam, control, p); @@ -206,8 +217,7 @@ route_attach(struct socket *so, int prot { struct rawcb *rp; struct routecb *rop; - int af; - int error = 0; + int error; /* * use the rawcb but allocate a routecb, this @@ -229,21 +239,28 @@ route_attach(struct socket *so, int prot return (error); } rop->rtableid = curproc->p_p->ps_rtableid; - af = rp->rcb_proto.sp_protocol; - if (af == AF_INET) + switch (rp->rcb_proto.sp_protocol) { + case AF_INET: route_cb.ip_count++; - else if (af == AF_INET6) + break; + case AF_INET6: route_cb.ip6_count++; + break; #ifdef MPLS - else if (af == AF_MPLS) + case AF_MPLS: route_cb.mpls_count++; + break; #endif - rp->rcb_faddr = &route_src; - route_cb.any_count++; + } + soisconnected(so); so->so_options |= SO_USELOOPBACK; - return (error); + rp->rcb_faddr = &route_src; + route_cb.any_count++; + LIST_INSERT_HEAD(&route_cb.rcb, rop, rcb_list); + + return (0); } int @@ -360,10 +377,11 @@ route_input(struct mbuf *m0, struct sock return; } - LIST_FOREACH(rp, &rawcb, rcb_list) { - if (rp->rcb_socket->so_state & SS_CANTRCVMORE) + LIST_FOREACH(rop, &route_cb.rcb, rcb_list) { + rp = &rop->rcb; + if (!(rp->rcb_socket->so_state & SS_ISCONNECTED)) continue; - if (rp->rcb_proto.sp_family != PF_ROUTE) + if (rp->rcb_socket->so_state & SS_CANTRCVMORE) continue; /* Check to see if we don't want our own messages. */ if (so == rp->rcb_socket && !(so->so_options & SO_USELOOPBACK)) @@ -378,23 +396,8 @@ route_input(struct mbuf *m0, struct sock sa_family != AF_UNSPEC && rp->rcb_proto.sp_protocol != sa_family) continue; - /* - * We assume the lower level routines have - * placed the address in a canonical format - * suitable for a structure comparison. - * - * Note that if the lengths are not the same - * the comparison will fail at the first byte. - */ -#define equal(a1, a2) \ - (bcmp((caddr_t)(a1), (caddr_t)(a2), a1->sa_len) == 0) - if (rp->rcb_laddr && !equal(rp->rcb_laddr, sodst)) - continue; - if (rp->rcb_faddr && !equal(rp->rcb_faddr, sosrc)) - continue; /* filter messages that the process does not want */ - rop = (struct routecb *)rp; rtm = mtod(m, struct rt_msghdr *); /* but RTM_DESYNC can't be filtered */ if (rtm->rtm_type != RTM_DESYNC && rop->msgfilter != 0 && @@ -1787,7 +1790,7 @@ struct protosw routesw[] = { .pr_ctloutput = route_ctloutput, .pr_usrreq = route_usrreq, .pr_attach = route_attach, - .pr_init = raw_init, + .pr_init = route_prinit, .pr_sysctl = sysctl_rtable } };