NFS poll(2)/select(2) and kqueue(2) behaviors are incoherent.  Diff
below uses the kernel-only NOTE_IMM hint to make the kqueue handlers
behave like the current poll handler: the poller is bypassed.

The new EVFILT_WRITE handler doesn't check for NOTE_IMM because it is
unlikely to introduce regression.

Is this a preferred approach?  Ok?

Index: nfs/nfs_kq.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_kq.c,v
retrieving revision 1.30
diff -u -p -r1.30 nfs_kq.c
--- nfs/nfs_kq.c        7 Apr 2020 13:27:52 -0000       1.30
+++ nfs/nfs_kq.c        31 May 2020 08:43:36 -0000
@@ -50,9 +50,12 @@
 #include <nfs/nfs_var.h>
 
 void   nfs_kqpoll(void *);
+int    nfs_kqwatch(struct vnode *);
+void   nfs_kqunwatch(struct vnode *);
 
 void   filt_nfsdetach(struct knote *);
 int    filt_nfsread(struct knote *, long);
+int    filt_nfswrite(struct knote *, long);
 int    filt_nfsvnode(struct knote *, long);
 
 struct kevq {
@@ -182,11 +185,19 @@ void
 filt_nfsdetach(struct knote *kn)
 {
        struct vnode *vp = (struct vnode *)kn->kn_hook;
-       struct kevq *ke;
 
        klist_remove(&vp->v_selectinfo.si_note, kn);
 
        /* Remove the vnode from watch list */
+       if ((kn->kn_sfflags & NOTE_IMM) == 0)
+               nfs_kqunwatch(vp);
+}
+
+void
+nfs_kqunwatch(struct vnode *vp)
+{
+       struct kevq *ke;
+
        rw_enter_write(&nfskevq_lock);
        SLIST_FOREACH(ke, &kevlist, kev_link) {
                if (ke->vp == vp) {
@@ -238,6 +249,22 @@ filt_nfsread(struct knote *kn, long hint
 }
 
 int
+filt_nfswrite(struct knote *kn, long hint)
+{
+       /*
+        * filesystem is gone, so set the EOF flag and schedule
+        * the knote for deletion.
+        */
+       if (hint == NOTE_REVOKE) {
+               kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+               return (1);
+       }
+
+       kn->kn_data = 0;
+       return (1);
+}
+
+int
 filt_nfsvnode(struct knote *kn, long hint)
 {
        if (kn->kn_sfflags & hint)
@@ -256,6 +283,13 @@ static const struct filterops nfsread_fi
        .f_event        = filt_nfsread,
 };
 
+const struct filterops nfswrite_filtops = {
+       .f_flags        = FILTEROP_ISFD,
+       .f_attach       = NULL,
+       .f_detach       = filt_nfsdetach,
+       .f_event        = filt_nfswrite,
+};
+
 static const struct filterops nfsvnode_filtops = {
        .f_flags        = FILTEROP_ISFD,
        .f_attach       = NULL,
@@ -269,10 +303,6 @@ nfs_kqfilter(void *v)
        struct vop_kqfilter_args *ap = v;
        struct vnode *vp;
        struct knote *kn;
-       struct kevq *ke;
-       int error = 0;
-       struct vattr attr;
-       struct proc *p = curproc;       /* XXX */
 
        vp = ap->a_vp;
        kn = ap->a_kn;
@@ -286,6 +316,9 @@ nfs_kqfilter(void *v)
        case EVFILT_READ:
                kn->kn_fop = &nfsread_filtops;
                break;
+       case EVFILT_WRITE:
+               kn->kn_fop = &nfswrite_filtops;
+               break;
        case EVFILT_VNODE:
                kn->kn_fop = &nfsvnode_filtops;
                break;
@@ -298,7 +331,27 @@ nfs_kqfilter(void *v)
        /*
         * Put the vnode to watched list.
         */
-       
+       if ((kn->kn_sfflags & NOTE_IMM) == 0) {
+               int error;
+
+               error = nfs_kqwatch(vp);
+               if (error)
+                       return (error);
+       }
+
+       klist_insert(&vp->v_selectinfo.si_note, kn);
+
+       return (0);
+}
+
+int
+nfs_kqwatch(struct vnode *vp)
+{
+       struct proc *p = curproc;       /* XXX */
+       struct vattr attr;
+       struct kevq *ke;
+       int error = 0;
+
        /*
         * Fetch current attributes. It's only needed when the vnode
         * is not watched yet, but we need to do this without lock
@@ -338,8 +391,6 @@ nfs_kqfilter(void *v)
 
        /* kick the poller */
        wakeup(pnfskq);
-
-       klist_insert(&vp->v_selectinfo.si_note, kn);
 
 out:
        rw_exit_write(&nfskevq_lock);

Reply via email to