Ok, this *used* to work, but I have some test reports
now that it will likely panic. so please hold off.

        Sorry about that

        -Bob


* Bob Beck <b...@openbsd.org> [2009-08-09 03:41]:
>       Ok let's try this again:
> 
> > 
> >     I could use some assistance in testing this, particularly on some of 
> > the more odd archetectures.
> > 
> >     This diff makes a bunch of changes to the vfs name cache:
> > 
> > 1) it gets rid of the global hash table and reverse hash table for namecahe 
> > entries. namecache entries
> >    are now allocated and freed in two global LRU's - one for regular, and 
> > one for negative entries. 
> > 
> > 2) Entries are no longer searched in the global lists, instead they are 
> > kept track of in the relevant
> > vnodes. Since each vnode can be either the parent (directory) vnode of a 
> > namecache entry, or the target of
> > the entry, we keep track of it in both ways in the vnode. We now use a rb 
> > tree to search the namecache from
> > a directory vnode, and keep a list of which entries that we are the target 
> > vnode.
> > 
> > 3) (most importantly) namecache entries can now be allocated and freed. 
> > 
> > 4) cache_purge now actually does something rather than depending on vnode 
> > horror to work. when recycling a vnode
> > cache_purge will now correctly clear the name cache entries associated with 
> > the vnode.  (before it basically
> > didn't do anything, and depended on us noticing the vnodes were being 
> > recycled underneath us)
> > 
> >     This has been beat on a bunch, and appears not to slow anything down. I 
> > do require some testing
> > and reports on other arch's particularly the likes of sparc, vax, and 
> > strange things if possible
> > 
> >     Thanks,
> >     -Bob
> 
>       I had this ready to go after c2k9, but with the instability in the
> tree I never committed it. I'd like some people to beat on it again please.
> 
>       This is a first step toward making vnodes something more generic and
> sane where hopefully we can allocate and deallocate them without major
> pain. I really need testing of this. - now updated to apply again.
> 
>       -Bob
> 
> Index: kern/vfs_cache.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_cache.c,v
> retrieving revision 1.30
> diff -u -r1.30 vfs_cache.c
> --- kern/vfs_cache.c  9 Jul 2009 22:29:56 -0000       1.30
> +++ kern/vfs_cache.c  9 Aug 2009 18:30:12 -0000
> @@ -68,26 +68,59 @@
>  /*
>   * Structures associated with name caching.
>   */
> -LIST_HEAD(nchashhead, namecache) *nchashtbl;
>  u_long       nchash;                         /* size of hash table - 1 */
> -long numcache;                       /* number of cache entries allocated */
> -TAILQ_HEAD(, namecache) nclruhead;           /* LRU chain */
> -struct       nchstats nchstats;              /* cache effectiveness 
> statistics */
> -
> -LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl;
> -u_long  ncvhash;                        /* size of hash table - 1 */
> -
> -#define NCHASH(dvp, cnp) \
> -     hash32_buf(&(dvp)->v_id, sizeof((dvp)->v_id), (cnp)->cn_hash) & nchash
> +long numcache;                       /* total number of cache entries 
> allocated */
> +long numneg;                         /* number of negative cache entries */
>  
> -#define NCVHASH(vp) (vp)->v_id & ncvhash
> +TAILQ_HEAD(, namecache) nclruhead;           /* Regular Entry LRU chain */
> +TAILQ_HEAD(, namecache) nclruneghead;                /* Negative Entry LRU 
> chain */
> +struct       nchstats nchstats;              /* cache effectiveness 
> statistics */
>  
>  int doingcache = 1;                  /* 1 => enable the cache */
>  
>  struct pool nch_pool;
>  
> +void cache_zap(struct namecache *);
>  u_long nextvnodeid;
>  
> +static int
> +namecache_compare(struct namecache *n1, struct namecache *n2)
> +{
> +     if (n1->nc_nlen == n2->nc_nlen)
> +             return (memcmp(n1->nc_name, n2->nc_name, n1->nc_nlen));
> +     else
> +             return (n1->nc_nlen - n2->nc_nlen);
> +}
> +
> +RB_GENERATE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
> +
> +/*
> + * blow away a namecache entry
> + */
> +void
> +cache_zap(struct namecache *ncp)
> +{
> +     struct vnode *dvp = NULL;
> +
> +     if (ncp->nc_vp != NULL) {
> +             TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
> +             numcache--;
> +     } else {
> +             TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
> +             numneg--;
> +     }
> +     if (ncp->nc_dvp) {
> +             RB_REMOVE(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree, ncp);
> +             if (RB_EMPTY(&ncp->nc_dvp->v_nc_tree))
> +                     dvp = ncp->nc_dvp;
> +     }
> +     if (ncp->nc_vp)
> +             TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_me);
> +     pool_put(&nch_pool, ncp);
> +     if (dvp)
> +             vdrop(dvp);
> +}
> +
>  /*
>   * Look for a name in the cache. We don't do this if the segment name is
>   * long, simply so the cache can avoid holding long names (which would
> @@ -107,7 +140,7 @@
>  cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname 
> *cnp)
>  {
>       struct namecache *ncp;
> -     struct nchashhead *ncpp;
> +     struct namecache n;
>       struct vnode *vp;
>       struct proc *p = curproc;
>       u_long vpid;
> @@ -125,14 +158,11 @@
>               return (-1);
>       }
>  
> -     ncpp = &nchashtbl[NCHASH(dvp, cnp)];
> -     LIST_FOREACH(ncp, ncpp, nc_hash) {
> -             if (ncp->nc_dvp == dvp &&
> -                 ncp->nc_dvpid == dvp->v_id &&
> -                 ncp->nc_nlen == cnp->cn_namelen &&
> -                 !memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
> -                     break;
> -     }
> +     /* lookup in directory vnode's redblack tree */
> +     n.nc_nlen = cnp->cn_namelen;
> +     memcpy(n.nc_name, cnp->cn_nameptr, n.nc_nlen);
> +     ncp = RB_FIND(namecache_rb_cache, &dvp->v_nc_tree, &n);
> +
>       if (ncp == NULL) {
>               nchstats.ncs_miss++;
>               return (-1);
> @@ -145,13 +175,10 @@
>                   (cnp->cn_flags & ISLASTCN) == 0) {
>                       nchstats.ncs_neghits++;
>                       /*
> -                      * Move this slot to end of LRU chain,
> -                      * if not already there.
> +                      * Move this slot to end of the negative LRU chain,
>                        */
> -                     if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
> -                             TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
> -                             TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
> -                     }
> +                     TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
> +                     TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
>                       return (ENOENT);
>               } else {
>                       nchstats.ncs_badhits++;
> @@ -220,12 +247,10 @@
>  
>       nchstats.ncs_goodhits++;
>       /*
> -      * Move this slot to end of LRU chain, if not already there.
> +      * Move this slot to end of the regular LRU chain.
>        */
> -     if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
> -             TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
> -             TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
> -     }
> +     TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
> +     TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
>       *vpp = vp;
>       return (0);
>  
> @@ -235,16 +260,7 @@
>        * the cache entry is invalid, or otherwise don't
>        * want cache entry to exist.
>        */
> -     TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
> -     LIST_REMOVE(ncp, nc_hash);
> -     ncp->nc_hash.le_prev = NULL;
> -
> -     if (ncp->nc_vhash.le_prev != NULL) {
> -             LIST_REMOVE(ncp, nc_vhash);
> -             ncp->nc_vhash.le_prev = NULL;
> -     }
> -
> -     TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
> +     cache_zap(ncp);
>       return (-1);
>  }
>  
> @@ -267,62 +283,51 @@
>  cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char 
> *bufp)
>  {
>       struct namecache *ncp;
> -     struct vnode *dvp;
> -     struct ncvhashhead *nvcpp;
> +     struct vnode *dvp = NULL;
>       char *bp;
>  
>       if (!doingcache)
>               goto out;
> -
> -     nvcpp = &ncvhashtbl[NCVHASH(vp)];
> -
> -     LIST_FOREACH(ncp, nvcpp, nc_vhash) {
> -             if (ncp->nc_vp == vp &&
> -                 ncp->nc_vpid == vp->v_id &&
> -                 (dvp = ncp->nc_dvp) != NULL &&
> -                 /* avoid pesky '.' entries.. */
> -                 dvp != vp && ncp->nc_dvpid == dvp->v_id) {
> -
> +     TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_me) {
> +             dvp = ncp->nc_dvp;
> +             if (dvp && dvp != vp)
> +                     goto found;
> +     }
> +     goto miss;
> +found:
>  #ifdef DIAGNOSTIC
> -                     if (ncp->nc_nlen == 1 &&
> -                         ncp->nc_name[0] == '.')
> -                             panic("cache_revlookup: found entry for .");
> -
> -                     if (ncp->nc_nlen == 2 &&
> -                         ncp->nc_name[0] == '.' &&
> -                         ncp->nc_name[1] == '.')
> -                             panic("cache_revlookup: found entry for ..");
> +     if (ncp->nc_nlen == 1 &&
> +         ncp->nc_name[0] == '.')
> +             panic("cache_revlookup: found entry for .");
> +     if (ncp->nc_nlen == 2 &&
> +         ncp->nc_name[0] == '.' &&
> +         ncp->nc_name[1] == '.')
> +             panic("cache_revlookup: found entry for ..");
>  #endif
> -                     nchstats.ncs_revhits++;
> -
> -                     if (bufp != NULL) {
> -                             bp = *bpp;
> -                             bp -= ncp->nc_nlen;
> -                             if (bp <= bufp) {
> -                                     *dvpp = NULL;
> -                                     return (ERANGE);
> -                             }
> -                             memcpy(bp, ncp->nc_name, ncp->nc_nlen);
> -                             *bpp = bp;
> -                     }
> +     nchstats.ncs_revhits++;
>  
> -                     *dvpp = dvp;
> -
> -                     /*
> -                      * XXX: Should we vget() here to have more
> -                      * consistent semantics with cache_lookup()?
> -                      *
> -                      * For MP safety it might be necessary to do
> -                      * this here, while also protecting hash
> -                      * tables themselves to provide some sort of
> -                      * sane inter locking.
> -                      */
> -                     return (0);
> +     if (bufp != NULL) {
> +             bp = *bpp;
> +             bp -= ncp->nc_nlen;
> +             if (bp <= bufp) {
> +                     *dvpp = NULL;
> +                     return (ERANGE);
>               }
> +             memcpy(bp, ncp->nc_name, ncp->nc_nlen);
> +             *bpp = bp;
>       }
> -     nchstats.ncs_revmiss++;
>  
> - out:
> +     *dvpp = dvp;
> +
> +     /*
> +      * XXX: Should we vget() here to have more
> +      * consistent semantics with cache_lookup()?
> +      */
> +     return (0);
> +
> +miss:
> +     nchstats.ncs_revmiss++;
> +out:
>       *dvpp = NULL;
>       return (-1);
>  }
> @@ -334,57 +339,49 @@
>  cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
>  {
>       struct namecache *ncp;
> -     struct nchashhead *ncpp;
> -     struct ncvhashhead *nvcpp;
>  
>       if (!doingcache || cnp->cn_namelen > NCHNAMLEN)
>               return;
>  
>       /*
> -      * Free the cache slot at head of lru chain.
> +      * allocate, or recycle (free and allocate) an ncp.
>        */
> -     if (numcache < desiredvnodes) {
> -             ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
> -             numcache++;
> -     } else if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL) {
> -             TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
> -             if (ncp->nc_hash.le_prev != NULL) {
> -                     LIST_REMOVE(ncp, nc_hash);
> -                     ncp->nc_hash.le_prev = NULL;
> -             }
> -             if (ncp->nc_vhash.le_prev != NULL) {
> -                     LIST_REMOVE(ncp, nc_vhash);
> -                     ncp->nc_vhash.le_prev = NULL;
> -             }
> -     } else
> -             return;
> +     if (numcache >= desiredvnodes) {
> +             if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL)
> +                     cache_zap(ncp);
> +             else if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
> +                     cache_zap(ncp);
> +             else
> +                     panic("wtf? leak?");
> +     }
> +     ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
> +
>       /* grab the vnode we just found */
>       ncp->nc_vp = vp;
>       if (vp)
>               ncp->nc_vpid = vp->v_id;
> +
>       /* fill in cache info */
>       ncp->nc_dvp = dvp;
>       ncp->nc_dvpid = dvp->v_id;
>       ncp->nc_nlen = cnp->cn_namelen;
>       bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
> -     TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
> -     ncpp = &nchashtbl[NCHASH(dvp, cnp)];
> -     LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
> -
> -     /*
> -      * Create reverse-cache entries (used in getcwd) for
> -      * directories.
> -      */
> -
> -     ncp->nc_vhash.le_prev = NULL;
> -     ncp->nc_vhash.le_next = NULL;
> -
> -     if (vp && vp != dvp && vp->v_type == VDIR &&
> -         (ncp->nc_nlen > 2 ||
> -             (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
> -             (ncp->nc_nlen > 0 && ncp->nc_name[0] != '.'))) {
> -             nvcpp = &ncvhashtbl[NCVHASH(vp)];
> -             LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
> +     if (RB_EMPTY(&dvp->v_nc_tree)) {
> +             vhold(dvp);
> +     }
> +     if (RB_INSERT(namecache_rb_cache, &dvp->v_nc_tree, ncp) != NULL)
> +             panic("Ich habe eine Rotweinflasche in meinem Arsch\n");
> +     if (vp) {
> +             TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
> +             numcache++;
> +             TAILQ_INSERT_TAIL(&vp->v_cache_dst, ncp, nc_me);
> +     } else {
> +             TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
> +             numneg++;
> +     }
> +     if (numneg  > desiredvnodes) {
> +             if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
> +                     cache_zap(ncp);
>       }
>  }
>  
> @@ -394,10 +391,8 @@
>  void
>  nchinit()
>  {
> -
>       TAILQ_INIT(&nclruhead);
> -     nchashtbl = hashinit(desiredvnodes, M_CACHE, M_WAITOK, &nchash);
> -     ncvhashtbl = hashinit(desiredvnodes/8, M_CACHE, M_WAITOK, &ncvhash);
> +     TAILQ_INIT(&nclruneghead);
>       pool_init(&nch_pool, sizeof(struct namecache), 0, 0, 0, "nchpl",
>           &pool_allocator_nointr);
>  }
> @@ -410,18 +405,16 @@
>  cache_purge(struct vnode *vp)
>  {
>       struct namecache *ncp;
> -     struct nchashhead *ncpp;
>  
> +     while ((ncp = TAILQ_FIRST(&vp->v_cache_dst)))
> +             cache_zap(ncp);
> +     while ((ncp = RB_ROOT(&vp->v_nc_tree)))
> +             cache_zap(ncp);
> +
> +     /* XXX this blows goats */
>       vp->v_id = ++nextvnodeid;
> -     if (nextvnodeid != 0)
> -             return;
> -     for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
> -             LIST_FOREACH(ncp, ncpp, nc_hash) {
> -                     ncp->nc_vpid = 0;
> -                     ncp->nc_dvpid = 0;
> -             }
> -     }
> -     vp->v_id = ++nextvnodeid;
> +     if (vp->v_id == 0)
> +             vp->v_id = ++nextvnodeid;
>  }
>  
>  /*
> @@ -437,6 +430,7 @@
>  {
>       struct namecache *ncp, *nxtcp;
>  
> +     /* whack the regular entries */
>       for (ncp = TAILQ_FIRST(&nclruhead); ncp != TAILQ_END(&nclruhead);
>           ncp = nxtcp) {
>               if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
> @@ -444,19 +438,20 @@
>                       continue;
>               }
>               /* free the resources we had */
> -             ncp->nc_vp = NULL;
> -             ncp->nc_dvp = NULL;
> -             TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
> -             if (ncp->nc_hash.le_prev != NULL) {
> -                     LIST_REMOVE(ncp, nc_hash);
> -                     ncp->nc_hash.le_prev = NULL;
> -             }
> -             if (ncp->nc_vhash.le_prev != NULL) {
> -                     LIST_REMOVE(ncp, nc_vhash);
> -                     ncp->nc_vhash.le_prev = NULL;
> -             }
> +             cache_zap(ncp);
>               /* cause rescan of list, it may have altered */
>               nxtcp = TAILQ_FIRST(&nclruhead);
> -             TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
> +     }
> +     /* whack the negative entries */
> +     for (ncp = TAILQ_FIRST(&nclruneghead); ncp != TAILQ_END(&nclruneghead);
> +         ncp = nxtcp) {
> +             if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
> +                     nxtcp = TAILQ_NEXT(ncp, nc_neg);
> +                     continue;
> +             }
> +             /* free the resources we had */
> +             cache_zap(ncp);
> +             /* cause rescan of list, it may have altered */
> +             nxtcp = TAILQ_FIRST(&nclruneghead);
>       }
>  }
> Index: kern/vfs_subr.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_subr.c,v
> retrieving revision 1.180
> diff -u -r1.180 vfs_subr.c
> --- kern/vfs_subr.c   2 Aug 2009 16:28:40 -0000       1.180
> +++ kern/vfs_subr.c   9 Aug 2009 18:30:12 -0000
> @@ -360,6 +360,8 @@
>               splx(s);
>               vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO);
>               RB_INIT(&vp->v_bufs_tree);
> +             RB_INIT(&vp->v_nc_tree);
> +             TAILQ_INIT(&vp->v_cache_dst);
>               numvnodes++;
>       } else {
>               for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
> Index: sys/namei.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/namei.h,v
> retrieving revision 1.22
> diff -u -r1.22 namei.h
> --- sys/namei.h       29 Aug 2008 08:57:28 -0000      1.22
> +++ sys/namei.h       9 Aug 2009 18:30:12 -0000
> @@ -36,6 +36,11 @@
>  #define      _SYS_NAMEI_H_
>  
>  #include <sys/queue.h>
> +#include <sys/tree.h>
> +
> +struct namecache;
> +struct namecache_rb_cache;
> +RB_PROTOTYPE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
>  
>  /*
>   * Encapsulation of namei parameters.
> @@ -156,9 +161,10 @@
>  #define      NCHNAMLEN       31      /* maximum name segment length we 
> bother with */
>  
>  struct       namecache {
> -     LIST_ENTRY(namecache) nc_hash;  /* hash chain */
> -     LIST_ENTRY(namecache) nc_vhash; /* (reverse) directory hash chain */
> -     TAILQ_ENTRY(namecache) nc_lru;  /* LRU chain */
> +     TAILQ_ENTRY(namecache) nc_lru;  /* Regular Entry LRU chain */
> +     TAILQ_ENTRY(namecache) nc_neg;  /* Negative Entry LRU chain */
> +     RB_ENTRY(namecache) n_rbcache;  /* Namecache rb tree from vnode */
> +     TAILQ_ENTRY(namecache) nc_me;   /* ncp's referring to me */
>       struct  vnode *nc_dvp;          /* vnode of parent of name */
>       u_long  nc_dvpid;               /* capability number of nc_dvp */
>       struct  vnode *nc_vp;           /* vnode the name refers to */
> Index: sys/vnode.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/vnode.h,v
> retrieving revision 1.102
> diff -u -r1.102 vnode.h
> --- sys/vnode.h       2 Aug 2009 16:28:40 -0000       1.102
> +++ sys/vnode.h       9 Aug 2009 18:30:12 -0000
> @@ -36,6 +36,7 @@
>  #include <sys/types.h>
>  #include <sys/queue.h>
>  #include <sys/lock.h>
> +#include <sys/namei.h>
>  #include <sys/selinfo.h>
>  #include <sys/tree.h>
>  
> @@ -82,6 +83,7 @@
>  LIST_HEAD(buflists, buf);
>  
>  RB_HEAD(buf_rb_bufs, buf);
> +RB_HEAD(namecache_rb_cache, namecache);
>  
>  struct vnode {
>       struct uvm_vnode v_uvm;                 /* uvm data */
> @@ -110,6 +112,10 @@
>               struct fifoinfo *vu_fifoinfo;   /* fifo (VFIFO) */
>       } v_un;
>  
> +     /* VFS namecache */
> +     struct namecache_rb_cache v_nc_tree;
> +     TAILQ_HEAD(, namecache) v_cache_dst;     /* cache entries to us */
> +
>       enum    vtagtype v_tag;                 /* type of underlying data */
>       void    *v_data;                        /* private data for fs */
>       struct  selinfo v_selectinfo;           /* identity of poller(s) */
> @@ -247,8 +253,9 @@
>  extern       int maxvnodes;                  /* XXX number of vnodes to 
> allocate */
>  extern       time_t syncdelay;               /* time to delay syncing vnodes 
> */
>  extern       int rushjob;                    /* # of slots syncer should run 
> ASAP */
> +extern void    vhold(struct vnode *);
> +extern void    vdrop(struct vnode *);
>  #endif /* _KERNEL */
> -
>  
>  /*
>   * Mods for exensibility.
> 

-- 
#!/usr/bin/perl
if ((not 0 && not 1) !=  (! 0 && ! 1)) {
   print "Larry and Tom must smoke some really primo stuff...\n"; 
}

Reply via email to