On Tue, Nov 15 2022, Martin Pieuchot <m...@openbsd.org> wrote:
> UVM vnode objects include a reference count to keep track of the number
> of processes that have the corresponding pages mapped in their VM space.
>
> When the last process referencing a given library or executable dies,
> the reaper will munmap this object on its behalf.  When this happens it
> doesn't free the associated pages to speed-up possible re-use of the
> file.  Instead the pages are placed on the inactive list but stay ready
> to be pmap_enter()'d without requiring I/O as soon as a newly process
> needs to access them.
>
> The mechanism to keep pages populated, known as UVM_VNODE_CANPERSIST,
> doesn't work well with swapping [0].  For some reason when the page daemon
> wants to free pages on the inactive list it tries to flush the pages to
> disk and panic(9) because it needs a valid reference to the vnode to do
> so.
>
> This indicates that the mechanism described above, which seems to work
> fine for RO mappings, is currently buggy in more complex situations.
> Flushing the pages when the last reference of the UVM object is dropped
> also doesn't seem to be enough as bluhm@ reported [1].
>
> The diff below, which has already be committed and reverted, gets rid of
> the UVM_VNODE_CANPERSIST logic.  I'd like to commit it again now that
> the arm64 caching bug has been found and fixed.
>
> Getting rid of this logic means more I/O will be generated and pages
> might have a faster reuse cycle.  I'm aware this might introduce a small
> slowdown,

Numbers for my usual make -j4 in libc,
on an Unmatched riscv64 box, now:
   16m32.65s real    21m36.79s user    30m53.45s system
   16m32.37s real    21m33.40s user    31m17.98s system
   16m32.63s real    21m35.74s user    31m12.01s system
   16m32.13s real    21m36.12s user    31m06.92s system
After:
   19m14.15s real    21m09.39s user    36m51.33s system
   19m19.11s real    21m02.61s user    36m58.46s system
   19m21.77s real    21m09.23s user    37m03.85s system
   19m09.39s real    21m08.96s user    36m36.00s system

4 cores amd64 VM, before (-current plus an other diff):
   1m54.31s real     2m47.36s user     4m24.70s system
   1m52.64s real     2m45.68s user     4m23.46s system
   1m53.47s real     2m43.59s user     4m27.60s system
After:
   2m34.12s real     2m51.15s user     6m20.91s system
   2m34.30s real     2m48.48s user     6m23.34s system
   2m37.07s real     2m49.60s user     6m31.53s system

> however I believe we should work towards loading files from the
> buffer cache to save I/O cycles instead of having another layer of cache.
> Such work isn't trivial and making sure the vnode <-> UVM relation is
> simple and well understood is the first step in this direction.
>
> I'd appreciate if the diff below could be tested on many architectures,
> include the offending rpi4.

Mike has already tested a make build on a riscv64 Unmatched.  I have
also run regress in sys, lib/libc and lib/libpthread on that arch.  As
far as I can see this looks stable on my machine, but what I really care
about is the riscv64 bulk build cluster (I'm going to start another
bulk build soon).

> Comments?  Oks?

The performance drop in my microbenchmark kinda worries me but it's only
a microbenchmark...

> [0] https://marc.info/?l=openbsd-bugs&m=164846737707559&w=2 
> [1] https://marc.info/?l=openbsd-bugs&m=166843373415030&w=2
>
> Index: uvm/uvm_vnode.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
> retrieving revision 1.130
> diff -u -p -r1.130 uvm_vnode.c
> --- uvm/uvm_vnode.c   20 Oct 2022 13:31:52 -0000      1.130
> +++ uvm/uvm_vnode.c   15 Nov 2022 13:28:28 -0000
> @@ -161,11 +161,8 @@ uvn_attach(struct vnode *vp, vm_prot_t a
>        * add it to the writeable list, and then return.
>        */
>       if (uvn->u_flags & UVM_VNODE_VALID) {   /* already active? */
> +             KASSERT(uvn->u_obj.uo_refs > 0);
>  
> -             /* regain vref if we were persisting */
> -             if (uvn->u_obj.uo_refs == 0) {
> -                     vref(vp);
> -             }
>               uvn->u_obj.uo_refs++;           /* bump uvn ref! */
>  
>               /* check for new writeable uvn */
> @@ -235,14 +232,14 @@ uvn_attach(struct vnode *vp, vm_prot_t a
>       KASSERT(uvn->u_obj.uo_refs == 0);
>       uvn->u_obj.uo_refs++;
>       oldflags = uvn->u_flags;
> -     uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST;
> +     uvn->u_flags = UVM_VNODE_VALID;
>       uvn->u_nio = 0;
>       uvn->u_size = used_vnode_size;
>  
>       /*
>        * add a reference to the vnode.   this reference will stay as long
>        * as there is a valid mapping of the vnode.   dropped when the
> -      * reference count goes to zero [and we either free or persist].
> +      * reference count goes to zero.
>        */
>       vref(vp);
>  
> @@ -323,16 +320,6 @@ uvn_detach(struct uvm_object *uobj)
>        */
>       vp->v_flag &= ~VTEXT;
>  
> -     /*
> -      * we just dropped the last reference to the uvn.   see if we can
> -      * let it "stick around".
> -      */
> -     if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
> -             /* won't block */
> -             uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
> -             goto out;
> -     }
> -
>       /* its a goner! */
>       uvn->u_flags |= UVM_VNODE_DYING;
>  
> @@ -382,7 +369,6 @@ uvn_detach(struct uvm_object *uobj)
>       /* wake up any sleepers */
>       if (oldflags & UVM_VNODE_WANTED)
>               wakeup(uvn);
> -out:
>       rw_exit(uobj->vmobjlock);
>  
>       /* drop our reference to the vnode. */
> @@ -498,8 +484,8 @@ uvm_vnp_terminate(struct vnode *vp)
>       }
>  
>       /*
> -      * done.   now we free the uvn if its reference count is zero
> -      * (true if we are zapping a persisting uvn).   however, if we are
> +      * done.   now we free the uvn if its reference count is zero.
> +      * however, if we are
>        * terminating a uvn with active mappings we let it live ... future
>        * calls down to the vnode layer will fail.
>        */
> @@ -507,14 +493,14 @@ uvm_vnp_terminate(struct vnode *vp)
>       if (uvn->u_obj.uo_refs) {
>               /*
>                * uvn must live on it is dead-vnode state until all references
> -              * are gone.   restore flags.    clear CANPERSIST state.
> +              * are gone.   restore flags.
>                */
>               uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED|
> -                   UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST);
> +                   UVM_VNODE_WANTED);
>       } else {
>               /*
>                * free the uvn now.   note that the vref reference is already
> -              * gone [it is dropped when we enter the persist state].
> +              * gone.
>                */
>               if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
>                       panic("uvm_vnp_terminate: io sync wanted bit set");
> @@ -1343,6 +1329,7 @@ uvm_vnp_uncache(struct vnode *vp)
>  {
>       struct uvm_vnode *uvn = vp->v_uvm;
>       struct uvm_object *uobj = &uvn->u_obj;
> +     int refs;
>  
>       /* lock uvn part of the vnode and check if we need to do anything */
>  
> @@ -1354,47 +1341,12 @@ uvm_vnp_uncache(struct vnode *vp)
>       }
>  
>       /*
> -      * we have a valid, non-blocked uvn.   clear persist flag.
> +      * we have a valid, non-blocked uvn.
>        * if uvn is currently active we can return now.
>        */
> -     uvn->u_flags &= ~UVM_VNODE_CANPERSIST;
> -     if (uvn->u_obj.uo_refs) {
> -             rw_exit(uobj->vmobjlock);
> -             return FALSE;
> -     }
> -
> -     /*
> -      * uvn is currently persisting!   we have to gain a reference to
> -      * it so that we can call uvn_detach to kill the uvn.
> -      */
> -     vref(vp);                       /* seems ok, even with VOP_LOCK */
> -     uvn->u_obj.uo_refs++;           /* value is now 1 */
> +     refs = uvn->u_obj.uo_refs;
>       rw_exit(uobj->vmobjlock);
> -
> -#ifdef VFSLCKDEBUG
> -     /*
> -      * carry over sanity check from old vnode pager: the vnode should
> -      * be VOP_LOCK'd, and we confirm it here.
> -      */
> -     if ((vp->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp))
> -             panic("uvm_vnp_uncache: vnode not locked!");
> -#endif
> -
> -     /*
> -      * now drop our reference to the vnode.   if we have the sole
> -      * reference to the vnode then this will cause it to die [as we
> -      * just cleared the persist flag].   we have to unlock the vnode
> -      * while we are doing this as it may trigger I/O.
> -      *
> -      * XXX: it might be possible for uvn to get reclaimed while we are
> -      * unlocked causing us to return TRUE when we should not.   we ignore
> -      * this as a false-positive return value doesn't hurt us.
> -      */
> -     VOP_UNLOCK(vp);
> -     uvn_detach(&uvn->u_obj);
> -     vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
> -
> -     return TRUE;
> +     return refs > 0 ? FALSE : TRUE;
>  }
>  
>  /*
> @@ -1488,11 +1440,9 @@ uvm_vnp_sync(struct mount *mp)
>                       continue;
>  
>               /*
> -              * gain reference.   watch out for persisting uvns (need to
> -              * regain vnode REF).
> +              * gain reference.
>                */
> -             if (uvn->u_obj.uo_refs == 0)
> -                     vref(vp);
> +             KASSERT(uvn->u_obj.uo_refs > 0);
>               uvn->u_obj.uo_refs++;
>  
>               SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq);
> Index: uvm/uvm_vnode.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_vnode.h,v
> retrieving revision 1.21
> diff -u -p -r1.21 uvm_vnode.h
> --- uvm/uvm_vnode.h   20 Oct 2022 13:31:52 -0000      1.21
> +++ uvm/uvm_vnode.h   15 Nov 2022 12:58:38 -0000
> @@ -71,7 +71,6 @@ struct uvm_vnode {
>   * u_flags values
>   */
>  #define UVM_VNODE_VALID              0x001   /* we are attached to the vnode 
> */
> -#define UVM_VNODE_CANPERSIST 0x002   /* we can persist after ref == 0 */
>  #define UVM_VNODE_ALOCK              0x004   /* uvn_attach is locked out */
>  #define UVM_VNODE_DYING              0x008   /* final detach/terminate in 
>                                          progress */
>

-- 
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE

Reply via email to