UVM vnode objects include a reference count to keep track of the number of processes that have the corresponding pages mapped in their VM space.
When the last process referencing a given library or executable dies, the reaper will munmap this object on its behalf. When this happens it doesn't free the associated pages to speed-up possible re-use of the file. Instead the pages are placed on the inactive list but stay ready to be pmap_enter()'d without requiring I/O as soon as a newly process needs to access them. The mechanism to keep pages populated, known as UVM_VNODE_CANPERSIST, doesn't work well with swapping [0]. For some reason when the page daemon wants to free pages on the inactive list it tries to flush the pages to disk and panic(9) because it needs a valid reference to the vnode to do so. This indicates that the mechanism described above, which seems to work fine for RO mappings, is currently buggy in more complex situations. Flushing the pages when the last reference of the UVM object is dropped also doesn't seem to be enough as bluhm@ reported [1]. The diff below, which has already be committed and reverted, gets rid of the UVM_VNODE_CANPERSIST logic. I'd like to commit it again now that the arm64 caching bug has been found and fixed. Getting rid of this logic means more I/O will be generated and pages might have a faster reuse cycle. I'm aware this might introduce a small slowdown, however I believe we should work towards loading files from the buffer cache to save I/O cycles instead of having another layer of cache. Such work isn't trivial and making sure the vnode <-> UVM relation is simple and well understood is the first step in this direction. I'd appreciate if the diff below could be tested on many architectures, include the offending rpi4. Comments? Oks? [0] https://marc.info/?l=openbsd-bugs&m=164846737707559&w=2 [1] https://marc.info/?l=openbsd-bugs&m=166843373415030&w=2 Index: uvm/uvm_vnode.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v retrieving revision 1.130 diff -u -p -r1.130 uvm_vnode.c --- uvm/uvm_vnode.c 20 Oct 2022 13:31:52 -0000 1.130 +++ uvm/uvm_vnode.c 15 Nov 2022 13:28:28 -0000 @@ -161,11 +161,8 @@ uvn_attach(struct vnode *vp, vm_prot_t a * add it to the writeable list, and then return. */ if (uvn->u_flags & UVM_VNODE_VALID) { /* already active? */ + KASSERT(uvn->u_obj.uo_refs > 0); - /* regain vref if we were persisting */ - if (uvn->u_obj.uo_refs == 0) { - vref(vp); - } uvn->u_obj.uo_refs++; /* bump uvn ref! */ /* check for new writeable uvn */ @@ -235,14 +232,14 @@ uvn_attach(struct vnode *vp, vm_prot_t a KASSERT(uvn->u_obj.uo_refs == 0); uvn->u_obj.uo_refs++; oldflags = uvn->u_flags; - uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST; + uvn->u_flags = UVM_VNODE_VALID; uvn->u_nio = 0; uvn->u_size = used_vnode_size; /* * add a reference to the vnode. this reference will stay as long * as there is a valid mapping of the vnode. dropped when the - * reference count goes to zero [and we either free or persist]. + * reference count goes to zero. */ vref(vp); @@ -323,16 +320,6 @@ uvn_detach(struct uvm_object *uobj) */ vp->v_flag &= ~VTEXT; - /* - * we just dropped the last reference to the uvn. see if we can - * let it "stick around". - */ - if (uvn->u_flags & UVM_VNODE_CANPERSIST) { - /* won't block */ - uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES); - goto out; - } - /* its a goner! */ uvn->u_flags |= UVM_VNODE_DYING; @@ -382,7 +369,6 @@ uvn_detach(struct uvm_object *uobj) /* wake up any sleepers */ if (oldflags & UVM_VNODE_WANTED) wakeup(uvn); -out: rw_exit(uobj->vmobjlock); /* drop our reference to the vnode. */ @@ -498,8 +484,8 @@ uvm_vnp_terminate(struct vnode *vp) } /* - * done. now we free the uvn if its reference count is zero - * (true if we are zapping a persisting uvn). however, if we are + * done. now we free the uvn if its reference count is zero. + * however, if we are * terminating a uvn with active mappings we let it live ... future * calls down to the vnode layer will fail. */ @@ -507,14 +493,14 @@ uvm_vnp_terminate(struct vnode *vp) if (uvn->u_obj.uo_refs) { /* * uvn must live on it is dead-vnode state until all references - * are gone. restore flags. clear CANPERSIST state. + * are gone. restore flags. */ uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED| - UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST); + UVM_VNODE_WANTED); } else { /* * free the uvn now. note that the vref reference is already - * gone [it is dropped when we enter the persist state]. + * gone. */ if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED) panic("uvm_vnp_terminate: io sync wanted bit set"); @@ -1343,6 +1329,7 @@ uvm_vnp_uncache(struct vnode *vp) { struct uvm_vnode *uvn = vp->v_uvm; struct uvm_object *uobj = &uvn->u_obj; + int refs; /* lock uvn part of the vnode and check if we need to do anything */ @@ -1354,47 +1341,12 @@ uvm_vnp_uncache(struct vnode *vp) } /* - * we have a valid, non-blocked uvn. clear persist flag. + * we have a valid, non-blocked uvn. * if uvn is currently active we can return now. */ - uvn->u_flags &= ~UVM_VNODE_CANPERSIST; - if (uvn->u_obj.uo_refs) { - rw_exit(uobj->vmobjlock); - return FALSE; - } - - /* - * uvn is currently persisting! we have to gain a reference to - * it so that we can call uvn_detach to kill the uvn. - */ - vref(vp); /* seems ok, even with VOP_LOCK */ - uvn->u_obj.uo_refs++; /* value is now 1 */ + refs = uvn->u_obj.uo_refs; rw_exit(uobj->vmobjlock); - -#ifdef VFSLCKDEBUG - /* - * carry over sanity check from old vnode pager: the vnode should - * be VOP_LOCK'd, and we confirm it here. - */ - if ((vp->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp)) - panic("uvm_vnp_uncache: vnode not locked!"); -#endif - - /* - * now drop our reference to the vnode. if we have the sole - * reference to the vnode then this will cause it to die [as we - * just cleared the persist flag]. we have to unlock the vnode - * while we are doing this as it may trigger I/O. - * - * XXX: it might be possible for uvn to get reclaimed while we are - * unlocked causing us to return TRUE when we should not. we ignore - * this as a false-positive return value doesn't hurt us. - */ - VOP_UNLOCK(vp); - uvn_detach(&uvn->u_obj); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - - return TRUE; + return refs > 0 ? FALSE : TRUE; } /* @@ -1488,11 +1440,9 @@ uvm_vnp_sync(struct mount *mp) continue; /* - * gain reference. watch out for persisting uvns (need to - * regain vnode REF). + * gain reference. */ - if (uvn->u_obj.uo_refs == 0) - vref(vp); + KASSERT(uvn->u_obj.uo_refs > 0); uvn->u_obj.uo_refs++; SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq); Index: uvm/uvm_vnode.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_vnode.h,v retrieving revision 1.21 diff -u -p -r1.21 uvm_vnode.h --- uvm/uvm_vnode.h 20 Oct 2022 13:31:52 -0000 1.21 +++ uvm/uvm_vnode.h 15 Nov 2022 12:58:38 -0000 @@ -71,7 +71,6 @@ struct uvm_vnode { * u_flags values */ #define UVM_VNODE_VALID 0x001 /* we are attached to the vnode */ -#define UVM_VNODE_CANPERSIST 0x002 /* we can persist after ref == 0 */ #define UVM_VNODE_ALOCK 0x004 /* uvn_attach is locked out */ #define UVM_VNODE_DYING 0x008 /* final detach/terminate in progress */