I could use some assistance in testing this, particularly on some of 
the more odd archetectures.

        This diff makes a bunch of changes to the vfs name cache:

1) it gets rid of the global hash table and reverse hash table for namecahe 
entries. namecache entries
   are now allocated and freed in two global LRU's - one for regular, and one 
for negative entries. 

2) Entries are no longer searched in the global lists, instead they are kept 
track of in the relevant
vnodes. Since each vnode can be either the parent (directory) vnode of a 
namecache entry, or the target of
the entry, we keep track of it in both ways in the vnode. We now use a rb tree 
to search the namecache from
a directory vnode, and keep a list of which entries that we are the target 
vnode.

3) (most importantly) namecache entries can now be allocated and freed. 

4) cache_purge now actually does something rather than depending on vnode 
horror to work. when recycling a vnode
cache_purge will now correctly clear the name cache entries associated with the 
vnode.  (before it basically
didn't do anything, and depended on us noticing the vnodes were being recycled 
underneath us)

        This has been beat on a bunch, and appears not to slow anything down. I 
do require some testing
and reports on other arch's particularly the likes of sparc, vax, and strange 
things if possible

        Thanks,
        -Bob

Index: kern/vfs_cache.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_cache.c,v
retrieving revision 1.29
diff -u -p kern/vfs_cache.c
--- kern/vfs_cache.c    24 Oct 2008 00:22:57 -0000      1.29
+++ kern/vfs_cache.c    11 Jun 2009 21:32:21 -0000
@@ -68,27 +68,60 @@
 /*
  * Structures associated with name caching.
  */
-LIST_HEAD(nchashhead, namecache) *nchashtbl;
 u_long nchash;                         /* size of hash table - 1 */
-long   numcache;                       /* number of cache entries allocated */
-TAILQ_HEAD(, namecache) nclruhead;             /* LRU chain */
+long   numcache;                       /* total number of cache entries 
allocated */
+long   numneg;                         /* number of negative cache entries */
+
+TAILQ_HEAD(, namecache) nclruhead;             /* Regular Entry LRU chain */
+TAILQ_HEAD(, namecache) nclruneghead;          /* Negative Entry LRU chain */
 struct nchstats nchstats;              /* cache effectiveness statistics */
 
-LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl;
-u_long  ncvhash;                        /* size of hash table - 1 */
-
-#define NCHASH(dvp, cnp) \
-       hash32_buf(&(dvp)->v_id, sizeof((dvp)->v_id), (cnp)->cn_hash) & nchash
-
-#define NCVHASH(vp) (vp)->v_id & ncvhash
-
 int doingcache = 1;                    /* 1 => enable the cache */
 
 struct pool nch_pool;
 
+void cache_zap(struct namecache *);
 u_long nextvnodeid;
 
+static int
+namecache_compare(struct namecache *n1, struct namecache *n2)
+{
+       if (n1->nc_nlen == n2->nc_nlen)
+               return (memcmp(n1->nc_name, n2->nc_name, n1->nc_nlen));
+       else
+               return (n1->nc_nlen - n2->nc_nlen);
+}
+
+RB_GENERATE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
+
 /*
+ * blow away a namecache entry
+ */
+void
+cache_zap(struct namecache *ncp)
+{
+       struct vnode *dvp = NULL;
+
+       if (ncp->nc_vp != NULL) {
+               TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+               numcache--;
+       } else {
+               TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
+               numneg--;
+       }
+       if (ncp->nc_dvp) {
+               RB_REMOVE(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree, ncp);
+               if (RB_EMPTY(&ncp->nc_dvp->v_nc_tree))
+                       dvp = ncp->nc_dvp;
+       }
+       if (ncp->nc_vp)
+               TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_me);
+       pool_put(&nch_pool, ncp);
+       if (dvp)
+               vdrop(dvp);
+}
+
+/*
  * Look for a name in the cache. We don't do this if the segment name is
  * long, simply so the cache can avoid holding long names (which would
  * either waste space, or add greatly to the complexity).
@@ -107,7 +140,7 @@ int
 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
 {
        struct namecache *ncp;
-       struct nchashhead *ncpp;
+       struct namecache n;
        struct vnode *vp;
        struct proc *p = curproc;
        u_long vpid;
@@ -125,14 +158,11 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, st
                return (-1);
        }
 
-       ncpp = &nchashtbl[NCHASH(dvp, cnp)];
-       LIST_FOREACH(ncp, ncpp, nc_hash) {
-               if (ncp->nc_dvp == dvp &&
-                   ncp->nc_dvpid == dvp->v_id &&
-                   ncp->nc_nlen == cnp->cn_namelen &&
-                   !memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
-                       break;
-       }
+       /* lookup in directory vnode's redblack tree */
+       n.nc_nlen = cnp->cn_namelen;
+       memcpy(n.nc_name, cnp->cn_nameptr, n.nc_nlen);
+       ncp = RB_FIND(namecache_rb_cache, &dvp->v_nc_tree, &n);
+
        if (ncp == NULL) {
                nchstats.ncs_miss++;
                return (-1);
@@ -145,13 +175,10 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, st
                    (cnp->cn_flags & ISLASTCN) == 0) {
                        nchstats.ncs_neghits++;
                        /*
-                        * Move this slot to end of LRU chain,
-                        * if not already there.
+                        * Move this slot to end of the negative LRU chain,
                         */
-                       if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
-                               TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-                               TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
-                       }
+                       TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
+                       TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
                        return (ENOENT);
                } else {
                        nchstats.ncs_badhits++;
@@ -220,12 +247,10 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, st
 
        nchstats.ncs_goodhits++;
        /*
-        * Move this slot to end of LRU chain, if not already there.
+        * Move this slot to end of the regular LRU chain.
         */
-       if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
-               TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-               TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
-       }
+       TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+       TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
        *vpp = vp;
        return (0);
 
@@ -235,16 +260,7 @@ remove:
         * the cache entry is invalid, or otherwise don't
         * want cache entry to exist.
         */
-       TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-       LIST_REMOVE(ncp, nc_hash);
-       ncp->nc_hash.le_prev = NULL;
-
-       if (ncp->nc_vhash.le_prev != NULL) {
-               LIST_REMOVE(ncp, nc_vhash);
-               ncp->nc_vhash.le_prev = NULL;
-       }
-
-       TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
+       cache_zap(ncp);
        return (-1);
 }
 
@@ -267,62 +283,51 @@ int
 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
 {
        struct namecache *ncp;
-       struct vnode *dvp;
-       struct ncvhashhead *nvcpp;
+       struct vnode *dvp = NULL;
        char *bp;
 
        if (!doingcache)
                goto out;
-
-       nvcpp = &ncvhashtbl[NCVHASH(vp)];
-
-       LIST_FOREACH(ncp, nvcpp, nc_vhash) {
-               if (ncp->nc_vp == vp &&
-                   ncp->nc_vpid == vp->v_id &&
-                   (dvp = ncp->nc_dvp) != NULL &&
-                   /* avoid pesky '.' entries.. */
-                   dvp != vp && ncp->nc_dvpid == dvp->v_id) {
-
+       TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_me) {
+               dvp = ncp->nc_dvp;
+               if (dvp && dvp != vp)
+                       goto found;
+       }
+       goto miss;
+found:
 #ifdef DIAGNOSTIC
-                       if (ncp->nc_nlen == 1 &&
-                           ncp->nc_name[0] == '.')
-                               panic("cache_revlookup: found entry for .");
-
-                       if (ncp->nc_nlen == 2 &&
-                           ncp->nc_name[0] == '.' &&
-                           ncp->nc_name[1] == '.')
-                               panic("cache_revlookup: found entry for ..");
+       if (ncp->nc_nlen == 1 &&
+           ncp->nc_name[0] == '.')
+               panic("cache_revlookup: found entry for .");
+       if (ncp->nc_nlen == 2 &&
+           ncp->nc_name[0] == '.' &&
+           ncp->nc_name[1] == '.')
+               panic("cache_revlookup: found entry for ..");
 #endif
-                       nchstats.ncs_revhits++;
+       nchstats.ncs_revhits++;
 
-                       if (bufp != NULL) {
-                               bp = *bpp;
-                               bp -= ncp->nc_nlen;
-                               if (bp <= bufp) {
-                                       *dvpp = NULL;
-                                       return (ERANGE);
-                               }
-                               memcpy(bp, ncp->nc_name, ncp->nc_nlen);
-                               *bpp = bp;
-                       }
-
-                       *dvpp = dvp;
-
-                       /*
-                        * XXX: Should we vget() here to have more
-                        * consistent semantics with cache_lookup()?
-                        *
-                        * For MP safety it might be necessary to do
-                        * this here, while also protecting hash
-                        * tables themselves to provide some sort of
-                        * sane inter locking.
-                        */
-                       return (0);
+       if (bufp != NULL) {
+               bp = *bpp;
+               bp -= ncp->nc_nlen;
+               if (bp <= bufp) {
+                       *dvpp = NULL;
+                       return (ERANGE);
                }
+               memcpy(bp, ncp->nc_name, ncp->nc_nlen);
+               *bpp = bp;
        }
-       nchstats.ncs_revmiss++;
 
- out:
+       *dvpp = dvp;
+
+       /*
+        * XXX: Should we vget() here to have more
+        * consistent semantics with cache_lookup()?
+        */
+       return (0);
+
+miss:
+       nchstats.ncs_revmiss++;
+out:
        *dvpp = NULL;
        return (-1);
 }
@@ -334,58 +339,50 @@ void
 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 {
        struct namecache *ncp;
-       struct nchashhead *ncpp;
-       struct ncvhashhead *nvcpp;
 
        if (!doingcache || cnp->cn_namelen > NCHNAMLEN)
                return;
 
        /*
-        * Free the cache slot at head of lru chain.
+        * allocate, or recycle (free and allocate) an ncp.
         */
-       if (numcache < desiredvnodes) {
-               ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
-               numcache++;
-       } else if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL) {
-               TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-               if (ncp->nc_hash.le_prev != NULL) {
-                       LIST_REMOVE(ncp, nc_hash);
-                       ncp->nc_hash.le_prev = NULL;
-               }
-               if (ncp->nc_vhash.le_prev != NULL) {
-                       LIST_REMOVE(ncp, nc_vhash);
-                       ncp->nc_vhash.le_prev = NULL;
-               }
-       } else
-               return;
+       if (numcache >= desiredvnodes) {
+               if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL)
+                       cache_zap(ncp);
+               else if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
+                       cache_zap(ncp);
+               else
+                       panic("wtf? leak?");
+       }
+       ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
+
        /* grab the vnode we just found */
        ncp->nc_vp = vp;
        if (vp)
                ncp->nc_vpid = vp->v_id;
+
        /* fill in cache info */
        ncp->nc_dvp = dvp;
        ncp->nc_dvpid = dvp->v_id;
        ncp->nc_nlen = cnp->cn_namelen;
        bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
-       TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
-       ncpp = &nchashtbl[NCHASH(dvp, cnp)];
-       LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
-
-       /*
-        * Create reverse-cache entries (used in getcwd) for
-        * directories.
-        */
-
-       ncp->nc_vhash.le_prev = NULL;
-       ncp->nc_vhash.le_next = NULL;
-
-       if (vp && vp != dvp && vp->v_type == VDIR &&
-           (ncp->nc_nlen > 2 ||
-               (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
-               (ncp->nc_nlen > 0 && ncp->nc_name[0] != '.'))) {
-               nvcpp = &ncvhashtbl[NCVHASH(vp)];
-               LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
+       if (RB_EMPTY(&dvp->v_nc_tree)) {
+               vhold(dvp);
        }
+       if (RB_INSERT(namecache_rb_cache, &dvp->v_nc_tree, ncp) != NULL)
+               panic("Ich habe eine Rotweinflasche in meinem Arsch\n");
+       if (vp) {
+               TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
+               numcache++;
+               TAILQ_INSERT_TAIL(&vp->v_cache_dst, ncp, nc_me);
+       } else {
+               TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
+               numneg++;
+       }
+       if (numneg  > desiredvnodes) {
+               if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
+                       cache_zap(ncp);
+       }
 }
 
 /*
@@ -394,10 +391,8 @@ cache_enter(struct vnode *dvp, struct vnode *vp, struc
 void
 nchinit()
 {
-
        TAILQ_INIT(&nclruhead);
-       nchashtbl = hashinit(desiredvnodes, M_CACHE, M_WAITOK, &nchash);
-       ncvhashtbl = hashinit(desiredvnodes/8, M_CACHE, M_WAITOK, &ncvhash);
+       TAILQ_INIT(&nclruneghead);
        pool_init(&nch_pool, sizeof(struct namecache), 0, 0, 0, "nchpl",
            &pool_allocator_nointr);
 }
@@ -410,18 +405,16 @@ void
 cache_purge(struct vnode *vp)
 {
        struct namecache *ncp;
-       struct nchashhead *ncpp;
 
+       while ((ncp = TAILQ_FIRST(&vp->v_cache_dst)))
+               cache_zap(ncp);
+       while ((ncp = RB_ROOT(&vp->v_nc_tree)))
+               cache_zap(ncp);
+
+       /* XXX this blows goats */
        vp->v_id = ++nextvnodeid;
-       if (nextvnodeid != 0)
-               return;
-       for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
-               LIST_FOREACH(ncp, ncpp, nc_hash) {
-                       ncp->nc_vpid = 0;
-                       ncp->nc_dvpid = 0;
-               }
-       }
-       vp->v_id = ++nextvnodeid;
+       if (vp->v_id == 0)
+               vp->v_id = ++nextvnodeid;
 }
 
 /*
@@ -437,6 +430,7 @@ cache_purgevfs(struct mount *mp)
 {
        struct namecache *ncp, *nxtcp;
 
+       /* whack the regular entries */
        for (ncp = TAILQ_FIRST(&nclruhead); ncp != TAILQ_END(&nclruhead);
            ncp = nxtcp) {
                if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
@@ -444,19 +438,20 @@ cache_purgevfs(struct mount *mp)
                        continue;
                }
                /* free the resources we had */
-               ncp->nc_vp = NULL;
-               ncp->nc_dvp = NULL;
-               TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-               if (ncp->nc_hash.le_prev != NULL) {
-                       LIST_REMOVE(ncp, nc_hash);
-                       ncp->nc_hash.le_prev = NULL;
-               }
-               if (ncp->nc_vhash.le_prev != NULL) {
-                       LIST_REMOVE(ncp, nc_vhash);
-                       ncp->nc_vhash.le_prev = NULL;
-               }
+               cache_zap(ncp);
                /* cause rescan of list, it may have altered */
                nxtcp = TAILQ_FIRST(&nclruhead);
-               TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
+       }
+       /* whack the negative entries */
+       for (ncp = TAILQ_FIRST(&nclruneghead); ncp != TAILQ_END(&nclruneghead);
+           ncp = nxtcp) {
+               if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
+                       nxtcp = TAILQ_NEXT(ncp, nc_neg);
+                       continue;
+               }
+               /* free the resources we had */
+               cache_zap(ncp);
+               /* cause rescan of list, it may have altered */
+               nxtcp = TAILQ_FIRST(&nclruneghead);
        }
 }
Index: kern/vfs_subr.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.177
diff -u -p kern/vfs_subr.c
--- kern/vfs_subr.c     6 Jun 2009 18:06:22 -0000       1.177
+++ kern/vfs_subr.c     11 Jun 2009 21:32:21 -0000
@@ -360,6 +360,8 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (
                splx(s);
                vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO);
                RB_INIT(&vp->v_bufs_tree);
+               RB_INIT(&vp->v_nc_tree);
+               TAILQ_INIT(&vp->v_cache_dst);
                numvnodes++;
        } else {
                for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
Index: sys/namei.h
===================================================================
RCS file: /cvs/src/sys/sys/namei.h,v
retrieving revision 1.22
diff -u -p sys/namei.h
--- sys/namei.h 29 Aug 2008 08:57:28 -0000      1.22
+++ sys/namei.h 11 Jun 2009 21:32:21 -0000
@@ -36,7 +36,12 @@
 #define        _SYS_NAMEI_H_
 
 #include <sys/queue.h>
+#include <sys/tree.h>
 
+struct namecache;
+struct namecache_rb_cache;
+RB_PROTOTYPE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
+
 /*
  * Encapsulation of namei parameters.
  */
@@ -156,9 +161,10 @@ struct nameidata {
 #define        NCHNAMLEN       31      /* maximum name segment length we 
bother with */
 
 struct namecache {
-       LIST_ENTRY(namecache) nc_hash;  /* hash chain */
-       LIST_ENTRY(namecache) nc_vhash; /* (reverse) directory hash chain */
-       TAILQ_ENTRY(namecache) nc_lru;  /* LRU chain */
+       TAILQ_ENTRY(namecache) nc_lru;  /* Regular Entry LRU chain */
+       TAILQ_ENTRY(namecache) nc_neg;  /* Negative Entry LRU chain */
+       RB_ENTRY(namecache) n_rbcache;  /* Namecache rb tree from vnode */
+       TAILQ_ENTRY(namecache) nc_me;   /* ncp's referring to me */
        struct  vnode *nc_dvp;          /* vnode of parent of name */
        u_long  nc_dvpid;               /* capability number of nc_dvp */
        struct  vnode *nc_vp;           /* vnode the name refers to */
Index: sys/vnode.h
===================================================================
RCS file: /cvs/src/sys/sys/vnode.h,v
retrieving revision 1.99
diff -u -p sys/vnode.h
--- sys/vnode.h 3 Jun 2009 14:45:55 -0000       1.99
+++ sys/vnode.h 11 Jun 2009 21:32:21 -0000
@@ -36,6 +36,7 @@
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
+#include <sys/namei.h>
 #include <sys/selinfo.h>
 #include <sys/tree.h>
 
@@ -82,6 +83,7 @@ enum vtagtype {
 LIST_HEAD(buflists, buf);
 
 RB_HEAD(buf_rb_bufs, buf);
+RB_HEAD(namecache_rb_cache, namecache);
 
 struct vnode {
        struct uvm_vnode v_uvm;                 /* uvm data */
@@ -110,6 +112,10 @@ struct vnode {
                struct fifoinfo *vu_fifoinfo;   /* fifo (VFIFO) */
        } v_un;
 
+       /* VFS namecache */
+       struct namecache_rb_cache v_nc_tree;
+       TAILQ_HEAD(, namecache) v_cache_dst;     /* cache entries to us */
+
        enum    vtagtype v_tag;                 /* type of underlying data */
        void    *v_data;                        /* private data for fs */
        struct  selinfo v_selectinfo;           /* identity of poller(s) */
@@ -248,8 +254,9 @@ extern      int desiredvnodes;              /* XXX number 
of vnodes des
 extern int maxvnodes;                  /* XXX number of vnodes to allocate */
 extern time_t syncdelay;               /* time to delay syncing vnodes */
 extern int rushjob;                    /* # of slots syncer should run ASAP */
+extern void    vhold(struct vnode *);
+extern void    vdrop(struct vnode *);
 #endif /* _KERNEL */
-
 
 /*
  * Mods for exensibility.

Reply via email to