I'd like to enforce that pr_usrreq functions are always called at IPL_SOFTNET. This will allow us to keep locking simple as soon as we trade splsoftnet() for a rwlock.
Most of the PRU_* actions are already called under splsoftnet() and the ones that are not are relatively small, so it should not really matter since processes are already serialized by the KERNEL_LOCK(). I'd even argue that this is a step forward removing the KERNEL_LOCK from the socket layer. Comments, oks? Index: kern/sys_socket.c =================================================================== RCS file: /cvs/src/sys/kern/sys_socket.c,v retrieving revision 1.22 diff -u -p -r1.22 sys_socket.c --- kern/sys_socket.c 6 Oct 2016 17:02:10 -0000 1.22 +++ kern/sys_socket.c 16 Nov 2016 09:58:54 -0000 @@ -73,6 +73,7 @@ int soo_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p) { struct socket *so = (struct socket *)fp->f_data; + int s, error = 0; switch (cmd) { @@ -122,8 +123,12 @@ soo_ioctl(struct file *fp, u_long cmd, c return (ifioctl(so, cmd, data, p)); if (IOCGROUP(cmd) == 'r') return (rtioctl(cmd, data, p)); - return ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, + s = splsoftnet(); + error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, (struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)NULL, p)); + splx(s); + + return (error); } int @@ -167,6 +172,7 @@ int soo_stat(struct file *fp, struct stat *ub, struct proc *p) { struct socket *so = fp->f_data; + int s; memset(ub, 0, sizeof (*ub)); ub->st_mode = S_IFSOCK; @@ -177,8 +183,10 @@ soo_stat(struct file *fp, struct stat *u ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH; ub->st_uid = so->so_euid; ub->st_gid = so->so_egid; + s = splsoftnet(); (void) ((*so->so_proto->pr_usrreq)(so, PRU_SENSE, (struct mbuf *)ub, NULL, NULL, p)); + splx(s); return (0); } Index: kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.164 diff -u -p -r1.164 uipc_socket.c --- kern/uipc_socket.c 14 Nov 2016 08:45:30 -0000 1.164 +++ kern/uipc_socket.c 16 Nov 2016 10:14:05 -0000 @@ -652,8 +652,10 @@ soreceive(struct socket *so, struct mbuf flags |= MSG_DONTWAIT; if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); + s = splsoftnet(); error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc); + splx(s); if (error) goto bad; do { Index: kern/uipc_syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v retrieving revision 1.139 diff -u -p -r1.139 uipc_syscalls.c --- kern/uipc_syscalls.c 9 Nov 2016 09:39:43 -0000 1.139 +++ kern/uipc_syscalls.c 16 Nov 2016 10:17:08 -0000 @@ -1066,7 +1066,7 @@ sys_getsockname(struct proc *p, void *v, struct socket *so; struct mbuf *m = NULL; socklen_t len; - int error; + int error, s; if ((error = getsock(p, SCARG(uap, fdes), &fp)) != 0) return (error); @@ -1078,14 +1078,15 @@ sys_getsockname(struct proc *p, void *v, if (error) goto bad; m = m_getclr(M_WAIT, MT_SONAME); + s = splsoftnet(); error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0, p); + splx(s); if (error) goto bad; error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen)); bad: FRELE(fp, p); - if (m) - m_freem(m); + m_freem(m); return (error); } @@ -1104,7 +1105,7 @@ sys_getpeername(struct proc *p, void *v, struct socket *so; struct mbuf *m = NULL; socklen_t len; - int error; + int error, s; if ((error = getsock(p, SCARG(uap, fdes), &fp)) != 0) return (error); @@ -1120,7 +1121,9 @@ sys_getpeername(struct proc *p, void *v, if (error) goto bad; m = m_getclr(M_WAIT, MT_SONAME); + s = splsoftnet(); error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0, p); + splx(s); if (error) goto bad; error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen)); Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.461 diff -u -p -r1.461 if.c --- net/if.c 14 Nov 2016 10:52:04 -0000 1.461 +++ net/if.c 16 Nov 2016 09:58:54 -0000 @@ -2046,9 +2046,11 @@ ifioctl(struct socket *so, u_long cmd, c default: if (so->so_proto == 0) return (EOPNOTSUPP); + s = splsoftnet(); error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, (struct mbuf *) cmd, (struct mbuf *) data, (struct mbuf *) ifp, p)); + splx(s); break; } Index: net/raw_usrreq.c =================================================================== RCS file: /cvs/src/sys/net/raw_usrreq.c,v retrieving revision 1.25 diff -u -p -r1.25 raw_usrreq.c --- net/raw_usrreq.c 8 Oct 2016 03:32:25 -0000 1.25 +++ net/raw_usrreq.c 16 Nov 2016 09:58:54 -0000 @@ -137,7 +137,9 @@ raw_usrreq(struct socket *so, int req, s { struct rawcb *rp = sotorawcb(so); int error = 0; - int len, s; + int len; + + splsoftassert(IPL_SOFTNET); if (req == PRU_CONTROL) return (EOPNOTSUPP); @@ -149,7 +151,6 @@ raw_usrreq(struct socket *so, int req, s m_freem(m); return (EINVAL); } - s = splsoftnet(); switch (req) { /* @@ -230,7 +231,6 @@ raw_usrreq(struct socket *so, int req, s /* * stat: don't bother with a blocksize. */ - splx(s); return (0); /* @@ -238,7 +238,6 @@ raw_usrreq(struct socket *so, int req, s */ case PRU_RCVOOB: case PRU_RCVD: - splx(s); return (EOPNOTSUPP); case PRU_LISTEN: @@ -270,7 +269,6 @@ raw_usrreq(struct socket *so, int req, s default: panic("raw_usrreq"); } - splx(s); m_freem(m); return (error); } Index: net/rtsock.c =================================================================== RCS file: /cvs/src/sys/net/rtsock.c,v retrieving revision 1.208 diff -u -p -r1.208 rtsock.c --- net/rtsock.c 18 Oct 2016 11:05:45 -0000 1.208 +++ net/rtsock.c 16 Nov 2016 09:58:54 -0000 @@ -149,10 +149,11 @@ route_usrreq(struct socket *so, int req, { struct rawcb *rp; struct routecb *rop; - int s, af; + int af; int error = 0; - s = splsoftnet(); + splsoftassert(IPL_SOFTNET); + rp = sotorawcb(so); switch (req) { @@ -178,7 +179,6 @@ route_usrreq(struct socket *so, int req, error = raw_attach(so, (int)(long)nam); if (error) { free(rop, M_PCB, sizeof(struct routecb)); - splx(s); return (error); } rop->rtableid = curproc->p_p->ps_rtableid; @@ -229,7 +229,6 @@ route_usrreq(struct socket *so, int req, error = raw_usrreq(so, req, m, nam, control, p); } - splx(s); return (error); } Index: netinet/ip_divert.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_divert.c,v retrieving revision 1.39 diff -u -p -r1.39 ip_divert.c --- netinet/ip_divert.c 7 Mar 2016 18:44:00 -0000 1.39 +++ netinet/ip_divert.c 16 Nov 2016 09:58:54 -0000 @@ -63,7 +63,6 @@ int divbhashsize = DIVERTHASHSIZE; static struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; -void divert_detach(struct inpcb *); int divert_output(struct inpcb *, struct mbuf *, struct mbuf *, struct mbuf *); void @@ -248,7 +247,8 @@ divert_usrreq(struct socket *so, int req { struct inpcb *inp = sotoinpcb(so); int error = 0; - int s; + + splsoftassert(IPL_SOFTNET); if (req == PRU_CONTROL) { return (in_control(so, (u_long)m, (caddr_t)addr, @@ -269,9 +269,7 @@ divert_usrreq(struct socket *so, int req error = EACCES; break; } - s = splsoftnet(); error = in_pcballoc(so, &divbtable); - splx(s); if (error) break; @@ -282,13 +280,11 @@ divert_usrreq(struct socket *so, int req break; case PRU_DETACH: - divert_detach(inp); + in_pcbdetach(inp); break; case PRU_BIND: - s = splsoftnet(); error = in_pcbbind(inp, addr, p); - splx(s); break; case PRU_SHUTDOWN: @@ -300,7 +296,7 @@ divert_usrreq(struct socket *so, int req case PRU_ABORT: soisdisconnected(so); - divert_detach(inp); + in_pcbdetach(inp); break; case PRU_SOCKADDR: @@ -339,15 +335,6 @@ release: m_freem(control); m_freem(m); return (error); -} - -void -divert_detach(struct inpcb *inp) -{ - int s = splsoftnet(); - - in_pcbdetach(inp); - splx(s); } /* Index: netinet/raw_ip.c =================================================================== RCS file: /cvs/src/sys/netinet/raw_ip.c,v retrieving revision 1.87 diff -u -p -r1.87 raw_ip.c --- netinet/raw_ip.c 14 Nov 2016 03:51:53 -0000 1.87 +++ netinet/raw_ip.c 16 Nov 2016 09:58:54 -0000 @@ -409,7 +409,8 @@ rip_usrreq(struct socket *so, int req, s { struct inpcb *inp = sotoinpcb(so); int error = 0; - int s; + + splsoftassert(IPL_SOFTNET); if (req == PRU_CONTROL) return (in_control(so, (u_long)m, (caddr_t)nam, @@ -433,13 +434,10 @@ rip_usrreq(struct socket *so, int req, s error = EPROTONOSUPPORT; break; } - s = splsoftnet(); if ((error = soreserve(so, rip_sendspace, rip_recvspace)) || (error = in_pcballoc(so, &rawcbtable))) { - splx(s); break; } - splx(s); inp = sotoinpcb(so); inp->inp_ip.ip_p = (long)nam; break; Index: netinet/tcp_usrreq.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.135 diff -u -p -r1.135 tcp_usrreq.c --- netinet/tcp_usrreq.c 24 Sep 2016 14:51:37 -0000 1.135 +++ netinet/tcp_usrreq.c 16 Nov 2016 09:58:54 -0000 @@ -130,10 +130,11 @@ tcp_usrreq(struct socket *so, int req, s struct sockaddr_in *sin; struct inpcb *inp; struct tcpcb *tp = NULL; - int s; int error = 0; short ostate; + splsoftassert(IPL_SOFTNET); + if (req == PRU_CONTROL) { #ifdef INET6 if (sotopf(so) == PF_INET6) @@ -150,7 +151,6 @@ tcp_usrreq(struct socket *so, int req, s return (EINVAL); } - s = splsoftnet(); inp = sotoinpcb(so); /* * When a TCP is attached to a socket, then there will be @@ -161,7 +161,6 @@ tcp_usrreq(struct socket *so, int req, s error = so->so_error; if (error == 0) error = EINVAL; - splx(s); /* * The following corrects an mbuf leak under rare * circumstances @@ -174,7 +173,6 @@ tcp_usrreq(struct socket *so, int req, s tp = intotcpcb(inp); /* tp might get 0 when using socket splicing */ if (tp == NULL) { - splx(s); return (0); } #ifdef KPROF @@ -382,7 +380,6 @@ tcp_usrreq(struct socket *so, int req, s case PRU_SENSE: ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; - splx(s); return (0); case PRU_RCVOOB: @@ -447,7 +444,6 @@ tcp_usrreq(struct socket *so, int req, s } if (tp && (so->so_options & SO_DEBUG)) tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); - splx(s); return (error); } Index: netinet/udp_usrreq.c =================================================================== RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v retrieving revision 1.220 diff -u -p -r1.220 udp_usrreq.c --- netinet/udp_usrreq.c 3 Nov 2016 18:42:35 -0000 1.220 +++ netinet/udp_usrreq.c 16 Nov 2016 09:58:54 -0000 @@ -1105,7 +1105,8 @@ udp_usrreq(struct socket *so, int req, s { struct inpcb *inp; int error = 0; - int s; + + splsoftassert(IPL_SOFTNET); if (req == PRU_CONTROL) { #ifdef INET6 @@ -1118,7 +1119,6 @@ udp_usrreq(struct socket *so, int req, s (struct ifnet *)control)); } - s = splsoftnet(); inp = sotoinpcb(so); if (inp == NULL && req != PRU_ATTACH) { error = EINVAL; @@ -1255,7 +1255,6 @@ udp_usrreq(struct socket *so, int req, s else #endif error = udp_output(inp, m, addr, control); - splx(s); return (error); case PRU_ABORT: @@ -1289,7 +1288,6 @@ udp_usrreq(struct socket *so, int req, s * Perhaps Path MTU might be returned for a connected * UDP socket in this case. */ - splx(s); return (0); case PRU_SENDOOB: @@ -1302,7 +1300,6 @@ udp_usrreq(struct socket *so, int req, s case PRU_RCVD: case PRU_RCVOOB: - splx(s); return (EOPNOTSUPP); /* do not free mbuf's */ default: @@ -1310,7 +1307,6 @@ udp_usrreq(struct socket *so, int req, s } release: - splx(s); m_freem(control); m_freem(m); return (error); Index: netinet6/ip6_divert.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v retrieving revision 1.41 diff -u -p -r1.41 ip6_divert.c --- netinet6/ip6_divert.c 29 Mar 2016 11:57:51 -0000 1.41 +++ netinet6/ip6_divert.c 16 Nov 2016 09:58:54 -0000 @@ -64,7 +64,6 @@ int divb6hashsize = DIVERTHASHSIZE; static struct sockaddr_in6 ip6addr = { sizeof(ip6addr), AF_INET6 }; -void divert6_detach(struct inpcb *); int divert6_output(struct inpcb *, struct mbuf *, struct mbuf *, struct mbuf *); @@ -251,7 +250,8 @@ divert6_usrreq(struct socket *so, int re { struct inpcb *inp = sotoinpcb(so); int error = 0; - int s; + + splsoftassert(IPL_SOFTNET); if (req == PRU_CONTROL) { return (in6_control(so, (u_long)m, (caddr_t)addr, @@ -272,9 +272,7 @@ divert6_usrreq(struct socket *so, int re error = EACCES; break; } - s = splsoftnet(); error = in_pcballoc(so, &divb6table); - splx(s); if (error) break; @@ -285,13 +283,11 @@ divert6_usrreq(struct socket *so, int re break; case PRU_DETACH: - divert6_detach(inp); + in_pcbdetach(inp); break; case PRU_BIND: - s = splsoftnet(); error = in_pcbbind(inp, addr, p); - splx(s); break; case PRU_SHUTDOWN: @@ -303,7 +299,7 @@ divert6_usrreq(struct socket *so, int re case PRU_ABORT: soisdisconnected(so); - divert6_detach(inp); + in_pcbdetach(inp); break; case PRU_SOCKADDR: @@ -342,15 +338,6 @@ release: m_freem(control); m_freem(m); return (error); -} - -void -divert6_detach(struct inpcb *inp) -{ - int s = splsoftnet(); - - in_pcbdetach(inp); - splx(s); } /* Index: netinet6/raw_ip6.c =================================================================== RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v retrieving revision 1.98 diff -u -p -r1.98 raw_ip6.c --- netinet6/raw_ip6.c 25 Oct 2016 19:40:57 -0000 1.98 +++ netinet6/raw_ip6.c 16 Nov 2016 09:58:54 -0000 @@ -568,9 +568,10 @@ rip6_usrreq(struct socket *so, int req, { struct inpcb *in6p = sotoinpcb(so); int error = 0; - int s; int priv; + splsoftassert(IPL_SOFTNET); + priv = 0; if ((so->so_state & SS_PRIV) != 0) priv++; @@ -591,13 +592,10 @@ rip6_usrreq(struct socket *so, int req, error = EPROTONOSUPPORT; break; } - s = splsoftnet(); if ((error = soreserve(so, rip6_sendspace, rip6_recvspace)) || (error = in_pcballoc(so, &rawin6pcbtable))) { - splx(s); break; } - splx(s); in6p = sotoinpcb(so); in6p->inp_ipv6.ip6_nxt = (long)nam; in6p->inp_cksum6 = -1;