NFS poll(2)/select(2) and kqueue(2) behaviors are incoherent. Diff below uses the kernel-only NOTE_IMM hint to make the kqueue handlers behave like the current poll handler: the poller is bypassed.
The new EVFILT_WRITE handler doesn't check for NOTE_IMM because it is unlikely to introduce regression. Is this a preferred approach? Ok? Index: nfs/nfs_kq.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_kq.c,v retrieving revision 1.30 diff -u -p -r1.30 nfs_kq.c --- nfs/nfs_kq.c 7 Apr 2020 13:27:52 -0000 1.30 +++ nfs/nfs_kq.c 31 May 2020 08:43:36 -0000 @@ -50,9 +50,12 @@ #include <nfs/nfs_var.h> void nfs_kqpoll(void *); +int nfs_kqwatch(struct vnode *); +void nfs_kqunwatch(struct vnode *); void filt_nfsdetach(struct knote *); int filt_nfsread(struct knote *, long); +int filt_nfswrite(struct knote *, long); int filt_nfsvnode(struct knote *, long); struct kevq { @@ -182,11 +185,19 @@ void filt_nfsdetach(struct knote *kn) { struct vnode *vp = (struct vnode *)kn->kn_hook; - struct kevq *ke; klist_remove(&vp->v_selectinfo.si_note, kn); /* Remove the vnode from watch list */ + if ((kn->kn_sfflags & NOTE_IMM) == 0) + nfs_kqunwatch(vp); +} + +void +nfs_kqunwatch(struct vnode *vp) +{ + struct kevq *ke; + rw_enter_write(&nfskevq_lock); SLIST_FOREACH(ke, &kevlist, kev_link) { if (ke->vp == vp) { @@ -238,6 +249,22 @@ filt_nfsread(struct knote *kn, long hint } int +filt_nfswrite(struct knote *kn, long hint) +{ + /* + * filesystem is gone, so set the EOF flag and schedule + * the knote for deletion. + */ + if (hint == NOTE_REVOKE) { + kn->kn_flags |= (EV_EOF | EV_ONESHOT); + return (1); + } + + kn->kn_data = 0; + return (1); +} + +int filt_nfsvnode(struct knote *kn, long hint) { if (kn->kn_sfflags & hint) @@ -256,6 +283,13 @@ static const struct filterops nfsread_fi .f_event = filt_nfsread, }; +const struct filterops nfswrite_filtops = { + .f_flags = FILTEROP_ISFD, + .f_attach = NULL, + .f_detach = filt_nfsdetach, + .f_event = filt_nfswrite, +}; + static const struct filterops nfsvnode_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = NULL, @@ -269,10 +303,6 @@ nfs_kqfilter(void *v) struct vop_kqfilter_args *ap = v; struct vnode *vp; struct knote *kn; - struct kevq *ke; - int error = 0; - struct vattr attr; - struct proc *p = curproc; /* XXX */ vp = ap->a_vp; kn = ap->a_kn; @@ -286,6 +316,9 @@ nfs_kqfilter(void *v) case EVFILT_READ: kn->kn_fop = &nfsread_filtops; break; + case EVFILT_WRITE: + kn->kn_fop = &nfswrite_filtops; + break; case EVFILT_VNODE: kn->kn_fop = &nfsvnode_filtops; break; @@ -298,7 +331,27 @@ nfs_kqfilter(void *v) /* * Put the vnode to watched list. */ - + if ((kn->kn_sfflags & NOTE_IMM) == 0) { + int error; + + error = nfs_kqwatch(vp); + if (error) + return (error); + } + + klist_insert(&vp->v_selectinfo.si_note, kn); + + return (0); +} + +int +nfs_kqwatch(struct vnode *vp) +{ + struct proc *p = curproc; /* XXX */ + struct vattr attr; + struct kevq *ke; + int error = 0; + /* * Fetch current attributes. It's only needed when the vnode * is not watched yet, but we need to do this without lock @@ -338,8 +391,6 @@ nfs_kqfilter(void *v) /* kick the poller */ wakeup(pnfskq); - - klist_insert(&vp->v_selectinfo.si_note, kn); out: rw_exit_write(&nfskevq_lock);