sys_connect() currently relies on the KERNEL_LOCK() to be sure `so_state' isn't changing while it manipulates the socket. Since we want to remove the KERNEL_LOCK() from the Network Stack, we need a way to guaranty this atomicity.
Diff below extends the scope of the socket lock. It has been previously introduced in sys_connect(), first as NET_LOCK() then renamed, where old splsofnet() were used. But we now need to "move the line up" in order to protect fields currently relying on the KERNEL_LOCK(). Moving the line up means that soconnect() and soconnect2() will now assert for the socket lock. This will allow to remove superfluous lock- unlock dances, especially in NFS. ok? Index: kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.194 diff -u -p -r1.194 uipc_socket.c --- kern/uipc_socket.c 13 Jul 2017 16:19:38 -0000 1.194 +++ kern/uipc_socket.c 18 Jul 2017 06:46:18 -0000 @@ -311,11 +311,12 @@ soaccept(struct socket *so, struct mbuf int soconnect(struct socket *so, struct mbuf *nam) { - int s, error; + int error; + + soassertlocked(so); if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); - s = solock(so); /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. @@ -329,19 +330,17 @@ soconnect(struct socket *so, struct mbuf else error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, NULL, nam, NULL, curproc); - sounlock(s); return (error); } int soconnect2(struct socket *so1, struct socket *so2) { - int s, error; + int error; - s = solock(so1); + soassertlocked(so1); error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, (struct mbuf *)so2, NULL, curproc); - sounlock(s); return (error); } Index: kern/uipc_syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v retrieving revision 1.153 diff -u -p -r1.153 uipc_syscalls.c --- kern/uipc_syscalls.c 12 Jul 2017 09:25:47 -0000 1.153 +++ kern/uipc_syscalls.c 18 Jul 2017 06:46:18 -0000 @@ -373,9 +373,10 @@ sys_connect(struct proc *p, void *v, reg if ((error = getsock(p, SCARG(uap, s), &fp)) != 0) return (error); so = fp->f_data; + s = solock(so); if (so->so_state & SS_ISCONNECTING) { - FRELE(fp, p); - return (EALREADY); + error = EALREADY; + goto out; } error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen), MT_SONAME); @@ -393,11 +394,8 @@ sys_connect(struct proc *p, void *v, reg if (isdnssocket(so)) { u_int namelen = nam->m_len; error = dns_portcheck(p, so, mtod(nam, void *), &namelen); - if (error) { - FRELE(fp, p); - m_freem(nam); - return (error); - } + if (error) + goto out; nam->m_len = namelen; } @@ -405,11 +403,9 @@ sys_connect(struct proc *p, void *v, reg if (error) goto bad; if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { - FRELE(fp, p); - m_freem(nam); - return (EINPROGRESS); + error = EINPROGRESS; + goto out; } - s = solock(so); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { error = sosleep(so, &so->so_timeo, PSOCK | PCATCH, "netcon2", 0); @@ -423,10 +419,11 @@ sys_connect(struct proc *p, void *v, reg error = so->so_error; so->so_error = 0; } - sounlock(s); bad: if (!interrupted) so->so_state &= ~SS_ISCONNECTING; +out: + sounlock(s); FRELE(fp, p); m_freem(nam); if (error == ERESTART) Index: miscfs/fifofs/fifo_vnops.c =================================================================== RCS file: /cvs/src/sys/miscfs/fifofs/fifo_vnops.c,v retrieving revision 1.57 diff -u -p -r1.57 fifo_vnops.c --- miscfs/fifofs/fifo_vnops.c 8 Jul 2017 09:19:02 -0000 1.57 +++ miscfs/fifofs/fifo_vnops.c 18 Jul 2017 06:46:18 -0000 @@ -124,7 +124,7 @@ fifo_open(void *v) struct fifoinfo *fip; struct proc *p = ap->a_p; struct socket *rso, *wso; - int error; + int s, error; if ((fip = vp->v_fifoinfo) == NULL) { fip = malloc(sizeof(*fip), M_VNODE, M_WAITOK); @@ -135,6 +135,11 @@ fifo_open(void *v) return (error); } fip->fi_readsock = rso; + /* + * XXXSMP + * We only lock `wso' because AF_LOCAL sockets are + * still relying on the KERNEL_LOCK(). + */ if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0)) != 0) { (void)soclose(rso); free(fip, M_VNODE, sizeof *fip); @@ -142,7 +147,9 @@ fifo_open(void *v) return (error); } fip->fi_writesock = wso; + s = solock(wso); if ((error = soconnect2(wso, rso)) != 0) { + sounlock(s); (void)soclose(wso); (void)soclose(rso); free(fip, M_VNODE, sizeof *fip); @@ -152,11 +159,15 @@ fifo_open(void *v) fip->fi_readers = fip->fi_writers = 0; wso->so_state |= SS_CANTSENDMORE; wso->so_snd.sb_lowat = PIPE_BUF; + } else { + rso = fip->fi_readsock; + wso = fip->fi_writesock; + s = solock(wso); } if (ap->a_mode & FREAD) { fip->fi_readers++; if (fip->fi_readers == 1) { - fip->fi_writesock->so_state &= ~SS_CANTSENDMORE; + wso->so_state &= ~SS_CANTSENDMORE; if (fip->fi_writers > 0) wakeup(&fip->fi_writers); } @@ -168,11 +179,12 @@ fifo_open(void *v) goto bad; } if (fip->fi_writers == 1) { - fip->fi_readsock->so_state &= ~(SS_CANTRCVMORE|SS_ISDISCONNECTED); + rso->so_state &= ~(SS_CANTRCVMORE|SS_ISDISCONNECTED); if (fip->fi_readers > 0) wakeup(&fip->fi_readers); } } + sounlock(s); if ((ap->a_mode & O_NONBLOCK) == 0) { if ((ap->a_mode & FREAD) && fip->fi_writers == 0) { VOP_UNLOCK(vp, p); @@ -246,6 +258,7 @@ fifo_write(void *v) if (ap->a_uio->uio_rw != UIO_WRITE) panic("fifo_write mode"); #endif + /* XXXSMP changing state w/o lock isn't safe. */ if (ap->a_ioflag & IO_NDELAY) wso->so_state |= SS_NBIO; VOP_UNLOCK(ap->a_vp, p); Index: nfs/nfs_socket.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_socket.c,v retrieving revision 1.119 diff -u -p -r1.119 nfs_socket.c --- nfs/nfs_socket.c 27 Jun 2017 12:02:43 -0000 1.119 +++ nfs/nfs_socket.c 18 Jul 2017 06:46:18 -0000 @@ -297,16 +297,18 @@ nfs_connect(struct nfsmount *nmp, struct goto bad; } } else { + s = solock(so); error = soconnect(so, nmp->nm_nam); - if (error) + if (error) { + sounlock(s); goto bad; + } /* * Wait for the connection to complete. Cribbed from the * connect system call but with the wait timing out so * that interruptible mounts don't hang here for a long time. */ - s = solock(so); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { sosleep(so, &so->so_timeo, PSOCK, "nfscon", 2 * hz); if ((so->so_state & SS_ISCONNECTING) &&