Ok let's try this again:

> 
>       I could use some assistance in testing this, particularly on some of 
> the more odd archetectures.
> 
>       This diff makes a bunch of changes to the vfs name cache:
> 
> 1) it gets rid of the global hash table and reverse hash table for namecahe 
> entries. namecache entries
>    are now allocated and freed in two global LRU's - one for regular, and one 
> for negative entries. 
> 
> 2) Entries are no longer searched in the global lists, instead they are kept 
> track of in the relevant
> vnodes. Since each vnode can be either the parent (directory) vnode of a 
> namecache entry, or the target of
> the entry, we keep track of it in both ways in the vnode. We now use a rb 
> tree to search the namecache from
> a directory vnode, and keep a list of which entries that we are the target 
> vnode.
> 
> 3) (most importantly) namecache entries can now be allocated and freed. 
> 
> 4) cache_purge now actually does something rather than depending on vnode 
> horror to work. when recycling a vnode
> cache_purge will now correctly clear the name cache entries associated with 
> the vnode.  (before it basically
> didn't do anything, and depended on us noticing the vnodes were being 
> recycled underneath us)
> 
>       This has been beat on a bunch, and appears not to slow anything down. I 
> do require some testing
> and reports on other arch's particularly the likes of sparc, vax, and strange 
> things if possible
> 
>       Thanks,
>       -Bob

        I had this ready to go after c2k9, but with the instability in the
tree I never committed it. I'd like some people to beat on it again please.

        This is a first step toward making vnodes something more generic and
sane where hopefully we can allocate and deallocate them without major
pain. I really need testing of this. - now updated to apply again.

        -Bob

Index: kern/vfs_cache.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_cache.c,v
retrieving revision 1.30
diff -u -r1.30 vfs_cache.c
--- kern/vfs_cache.c    9 Jul 2009 22:29:56 -0000       1.30
+++ kern/vfs_cache.c    9 Aug 2009 18:30:12 -0000
@@ -68,26 +68,59 @@
 /*
  * Structures associated with name caching.
  */
-LIST_HEAD(nchashhead, namecache) *nchashtbl;
 u_long nchash;                         /* size of hash table - 1 */
-long   numcache;                       /* number of cache entries allocated */
-TAILQ_HEAD(, namecache) nclruhead;             /* LRU chain */
-struct nchstats nchstats;              /* cache effectiveness statistics */
-
-LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl;
-u_long  ncvhash;                        /* size of hash table - 1 */
-
-#define NCHASH(dvp, cnp) \
-       hash32_buf(&(dvp)->v_id, sizeof((dvp)->v_id), (cnp)->cn_hash) & nchash
+long   numcache;                       /* total number of cache entries 
allocated */
+long   numneg;                         /* number of negative cache entries */
 
-#define NCVHASH(vp) (vp)->v_id & ncvhash
+TAILQ_HEAD(, namecache) nclruhead;             /* Regular Entry LRU chain */
+TAILQ_HEAD(, namecache) nclruneghead;          /* Negative Entry LRU chain */
+struct nchstats nchstats;              /* cache effectiveness statistics */
 
 int doingcache = 1;                    /* 1 => enable the cache */
 
 struct pool nch_pool;
 
+void cache_zap(struct namecache *);
 u_long nextvnodeid;
 
+static int
+namecache_compare(struct namecache *n1, struct namecache *n2)
+{
+       if (n1->nc_nlen == n2->nc_nlen)
+               return (memcmp(n1->nc_name, n2->nc_name, n1->nc_nlen));
+       else
+               return (n1->nc_nlen - n2->nc_nlen);
+}
+
+RB_GENERATE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
+
+/*
+ * blow away a namecache entry
+ */
+void
+cache_zap(struct namecache *ncp)
+{
+       struct vnode *dvp = NULL;
+
+       if (ncp->nc_vp != NULL) {
+               TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+               numcache--;
+       } else {
+               TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
+               numneg--;
+       }
+       if (ncp->nc_dvp) {
+               RB_REMOVE(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree, ncp);
+               if (RB_EMPTY(&ncp->nc_dvp->v_nc_tree))
+                       dvp = ncp->nc_dvp;
+       }
+       if (ncp->nc_vp)
+               TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_me);
+       pool_put(&nch_pool, ncp);
+       if (dvp)
+               vdrop(dvp);
+}
+
 /*
  * Look for a name in the cache. We don't do this if the segment name is
  * long, simply so the cache can avoid holding long names (which would
@@ -107,7 +140,7 @@
 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
 {
        struct namecache *ncp;
-       struct nchashhead *ncpp;
+       struct namecache n;
        struct vnode *vp;
        struct proc *p = curproc;
        u_long vpid;
@@ -125,14 +158,11 @@
                return (-1);
        }
 
-       ncpp = &nchashtbl[NCHASH(dvp, cnp)];
-       LIST_FOREACH(ncp, ncpp, nc_hash) {
-               if (ncp->nc_dvp == dvp &&
-                   ncp->nc_dvpid == dvp->v_id &&
-                   ncp->nc_nlen == cnp->cn_namelen &&
-                   !memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
-                       break;
-       }
+       /* lookup in directory vnode's redblack tree */
+       n.nc_nlen = cnp->cn_namelen;
+       memcpy(n.nc_name, cnp->cn_nameptr, n.nc_nlen);
+       ncp = RB_FIND(namecache_rb_cache, &dvp->v_nc_tree, &n);
+
        if (ncp == NULL) {
                nchstats.ncs_miss++;
                return (-1);
@@ -145,13 +175,10 @@
                    (cnp->cn_flags & ISLASTCN) == 0) {
                        nchstats.ncs_neghits++;
                        /*
-                        * Move this slot to end of LRU chain,
-                        * if not already there.
+                        * Move this slot to end of the negative LRU chain,
                         */
-                       if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
-                               TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-                               TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
-                       }
+                       TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
+                       TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
                        return (ENOENT);
                } else {
                        nchstats.ncs_badhits++;
@@ -220,12 +247,10 @@
 
        nchstats.ncs_goodhits++;
        /*
-        * Move this slot to end of LRU chain, if not already there.
+        * Move this slot to end of the regular LRU chain.
         */
-       if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
-               TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-               TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
-       }
+       TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+       TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
        *vpp = vp;
        return (0);
 
@@ -235,16 +260,7 @@
         * the cache entry is invalid, or otherwise don't
         * want cache entry to exist.
         */
-       TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-       LIST_REMOVE(ncp, nc_hash);
-       ncp->nc_hash.le_prev = NULL;
-
-       if (ncp->nc_vhash.le_prev != NULL) {
-               LIST_REMOVE(ncp, nc_vhash);
-               ncp->nc_vhash.le_prev = NULL;
-       }
-
-       TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
+       cache_zap(ncp);
        return (-1);
 }
 
@@ -267,62 +283,51 @@
 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
 {
        struct namecache *ncp;
-       struct vnode *dvp;
-       struct ncvhashhead *nvcpp;
+       struct vnode *dvp = NULL;
        char *bp;
 
        if (!doingcache)
                goto out;
-
-       nvcpp = &ncvhashtbl[NCVHASH(vp)];
-
-       LIST_FOREACH(ncp, nvcpp, nc_vhash) {
-               if (ncp->nc_vp == vp &&
-                   ncp->nc_vpid == vp->v_id &&
-                   (dvp = ncp->nc_dvp) != NULL &&
-                   /* avoid pesky '.' entries.. */
-                   dvp != vp && ncp->nc_dvpid == dvp->v_id) {
-
+       TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_me) {
+               dvp = ncp->nc_dvp;
+               if (dvp && dvp != vp)
+                       goto found;
+       }
+       goto miss;
+found:
 #ifdef DIAGNOSTIC
-                       if (ncp->nc_nlen == 1 &&
-                           ncp->nc_name[0] == '.')
-                               panic("cache_revlookup: found entry for .");
-
-                       if (ncp->nc_nlen == 2 &&
-                           ncp->nc_name[0] == '.' &&
-                           ncp->nc_name[1] == '.')
-                               panic("cache_revlookup: found entry for ..");
+       if (ncp->nc_nlen == 1 &&
+           ncp->nc_name[0] == '.')
+               panic("cache_revlookup: found entry for .");
+       if (ncp->nc_nlen == 2 &&
+           ncp->nc_name[0] == '.' &&
+           ncp->nc_name[1] == '.')
+               panic("cache_revlookup: found entry for ..");
 #endif
-                       nchstats.ncs_revhits++;
-
-                       if (bufp != NULL) {
-                               bp = *bpp;
-                               bp -= ncp->nc_nlen;
-                               if (bp <= bufp) {
-                                       *dvpp = NULL;
-                                       return (ERANGE);
-                               }
-                               memcpy(bp, ncp->nc_name, ncp->nc_nlen);
-                               *bpp = bp;
-                       }
+       nchstats.ncs_revhits++;
 
-                       *dvpp = dvp;
-
-                       /*
-                        * XXX: Should we vget() here to have more
-                        * consistent semantics with cache_lookup()?
-                        *
-                        * For MP safety it might be necessary to do
-                        * this here, while also protecting hash
-                        * tables themselves to provide some sort of
-                        * sane inter locking.
-                        */
-                       return (0);
+       if (bufp != NULL) {
+               bp = *bpp;
+               bp -= ncp->nc_nlen;
+               if (bp <= bufp) {
+                       *dvpp = NULL;
+                       return (ERANGE);
                }
+               memcpy(bp, ncp->nc_name, ncp->nc_nlen);
+               *bpp = bp;
        }
-       nchstats.ncs_revmiss++;
 
- out:
+       *dvpp = dvp;
+
+       /*
+        * XXX: Should we vget() here to have more
+        * consistent semantics with cache_lookup()?
+        */
+       return (0);
+
+miss:
+       nchstats.ncs_revmiss++;
+out:
        *dvpp = NULL;
        return (-1);
 }
@@ -334,57 +339,49 @@
 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 {
        struct namecache *ncp;
-       struct nchashhead *ncpp;
-       struct ncvhashhead *nvcpp;
 
        if (!doingcache || cnp->cn_namelen > NCHNAMLEN)
                return;
 
        /*
-        * Free the cache slot at head of lru chain.
+        * allocate, or recycle (free and allocate) an ncp.
         */
-       if (numcache < desiredvnodes) {
-               ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
-               numcache++;
-       } else if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL) {
-               TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-               if (ncp->nc_hash.le_prev != NULL) {
-                       LIST_REMOVE(ncp, nc_hash);
-                       ncp->nc_hash.le_prev = NULL;
-               }
-               if (ncp->nc_vhash.le_prev != NULL) {
-                       LIST_REMOVE(ncp, nc_vhash);
-                       ncp->nc_vhash.le_prev = NULL;
-               }
-       } else
-               return;
+       if (numcache >= desiredvnodes) {
+               if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL)
+                       cache_zap(ncp);
+               else if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
+                       cache_zap(ncp);
+               else
+                       panic("wtf? leak?");
+       }
+       ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
+
        /* grab the vnode we just found */
        ncp->nc_vp = vp;
        if (vp)
                ncp->nc_vpid = vp->v_id;
+
        /* fill in cache info */
        ncp->nc_dvp = dvp;
        ncp->nc_dvpid = dvp->v_id;
        ncp->nc_nlen = cnp->cn_namelen;
        bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
-       TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
-       ncpp = &nchashtbl[NCHASH(dvp, cnp)];
-       LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
-
-       /*
-        * Create reverse-cache entries (used in getcwd) for
-        * directories.
-        */
-
-       ncp->nc_vhash.le_prev = NULL;
-       ncp->nc_vhash.le_next = NULL;
-
-       if (vp && vp != dvp && vp->v_type == VDIR &&
-           (ncp->nc_nlen > 2 ||
-               (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
-               (ncp->nc_nlen > 0 && ncp->nc_name[0] != '.'))) {
-               nvcpp = &ncvhashtbl[NCVHASH(vp)];
-               LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
+       if (RB_EMPTY(&dvp->v_nc_tree)) {
+               vhold(dvp);
+       }
+       if (RB_INSERT(namecache_rb_cache, &dvp->v_nc_tree, ncp) != NULL)
+               panic("Ich habe eine Rotweinflasche in meinem Arsch\n");
+       if (vp) {
+               TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
+               numcache++;
+               TAILQ_INSERT_TAIL(&vp->v_cache_dst, ncp, nc_me);
+       } else {
+               TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
+               numneg++;
+       }
+       if (numneg  > desiredvnodes) {
+               if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
+                       cache_zap(ncp);
        }
 }
 
@@ -394,10 +391,8 @@
 void
 nchinit()
 {
-
        TAILQ_INIT(&nclruhead);
-       nchashtbl = hashinit(desiredvnodes, M_CACHE, M_WAITOK, &nchash);
-       ncvhashtbl = hashinit(desiredvnodes/8, M_CACHE, M_WAITOK, &ncvhash);
+       TAILQ_INIT(&nclruneghead);
        pool_init(&nch_pool, sizeof(struct namecache), 0, 0, 0, "nchpl",
            &pool_allocator_nointr);
 }
@@ -410,18 +405,16 @@
 cache_purge(struct vnode *vp)
 {
        struct namecache *ncp;
-       struct nchashhead *ncpp;
 
+       while ((ncp = TAILQ_FIRST(&vp->v_cache_dst)))
+               cache_zap(ncp);
+       while ((ncp = RB_ROOT(&vp->v_nc_tree)))
+               cache_zap(ncp);
+
+       /* XXX this blows goats */
        vp->v_id = ++nextvnodeid;
-       if (nextvnodeid != 0)
-               return;
-       for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
-               LIST_FOREACH(ncp, ncpp, nc_hash) {
-                       ncp->nc_vpid = 0;
-                       ncp->nc_dvpid = 0;
-               }
-       }
-       vp->v_id = ++nextvnodeid;
+       if (vp->v_id == 0)
+               vp->v_id = ++nextvnodeid;
 }
 
 /*
@@ -437,6 +430,7 @@
 {
        struct namecache *ncp, *nxtcp;
 
+       /* whack the regular entries */
        for (ncp = TAILQ_FIRST(&nclruhead); ncp != TAILQ_END(&nclruhead);
            ncp = nxtcp) {
                if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
@@ -444,19 +438,20 @@
                        continue;
                }
                /* free the resources we had */
-               ncp->nc_vp = NULL;
-               ncp->nc_dvp = NULL;
-               TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-               if (ncp->nc_hash.le_prev != NULL) {
-                       LIST_REMOVE(ncp, nc_hash);
-                       ncp->nc_hash.le_prev = NULL;
-               }
-               if (ncp->nc_vhash.le_prev != NULL) {
-                       LIST_REMOVE(ncp, nc_vhash);
-                       ncp->nc_vhash.le_prev = NULL;
-               }
+               cache_zap(ncp);
                /* cause rescan of list, it may have altered */
                nxtcp = TAILQ_FIRST(&nclruhead);
-               TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
+       }
+       /* whack the negative entries */
+       for (ncp = TAILQ_FIRST(&nclruneghead); ncp != TAILQ_END(&nclruneghead);
+           ncp = nxtcp) {
+               if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
+                       nxtcp = TAILQ_NEXT(ncp, nc_neg);
+                       continue;
+               }
+               /* free the resources we had */
+               cache_zap(ncp);
+               /* cause rescan of list, it may have altered */
+               nxtcp = TAILQ_FIRST(&nclruneghead);
        }
 }
Index: kern/vfs_subr.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.180
diff -u -r1.180 vfs_subr.c
--- kern/vfs_subr.c     2 Aug 2009 16:28:40 -0000       1.180
+++ kern/vfs_subr.c     9 Aug 2009 18:30:12 -0000
@@ -360,6 +360,8 @@
                splx(s);
                vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO);
                RB_INIT(&vp->v_bufs_tree);
+               RB_INIT(&vp->v_nc_tree);
+               TAILQ_INIT(&vp->v_cache_dst);
                numvnodes++;
        } else {
                for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
Index: sys/namei.h
===================================================================
RCS file: /cvs/src/sys/sys/namei.h,v
retrieving revision 1.22
diff -u -r1.22 namei.h
--- sys/namei.h 29 Aug 2008 08:57:28 -0000      1.22
+++ sys/namei.h 9 Aug 2009 18:30:12 -0000
@@ -36,6 +36,11 @@
 #define        _SYS_NAMEI_H_
 
 #include <sys/queue.h>
+#include <sys/tree.h>
+
+struct namecache;
+struct namecache_rb_cache;
+RB_PROTOTYPE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
 
 /*
  * Encapsulation of namei parameters.
@@ -156,9 +161,10 @@
 #define        NCHNAMLEN       31      /* maximum name segment length we 
bother with */
 
 struct namecache {
-       LIST_ENTRY(namecache) nc_hash;  /* hash chain */
-       LIST_ENTRY(namecache) nc_vhash; /* (reverse) directory hash chain */
-       TAILQ_ENTRY(namecache) nc_lru;  /* LRU chain */
+       TAILQ_ENTRY(namecache) nc_lru;  /* Regular Entry LRU chain */
+       TAILQ_ENTRY(namecache) nc_neg;  /* Negative Entry LRU chain */
+       RB_ENTRY(namecache) n_rbcache;  /* Namecache rb tree from vnode */
+       TAILQ_ENTRY(namecache) nc_me;   /* ncp's referring to me */
        struct  vnode *nc_dvp;          /* vnode of parent of name */
        u_long  nc_dvpid;               /* capability number of nc_dvp */
        struct  vnode *nc_vp;           /* vnode the name refers to */
Index: sys/vnode.h
===================================================================
RCS file: /cvs/src/sys/sys/vnode.h,v
retrieving revision 1.102
diff -u -r1.102 vnode.h
--- sys/vnode.h 2 Aug 2009 16:28:40 -0000       1.102
+++ sys/vnode.h 9 Aug 2009 18:30:12 -0000
@@ -36,6 +36,7 @@
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
+#include <sys/namei.h>
 #include <sys/selinfo.h>
 #include <sys/tree.h>
 
@@ -82,6 +83,7 @@
 LIST_HEAD(buflists, buf);
 
 RB_HEAD(buf_rb_bufs, buf);
+RB_HEAD(namecache_rb_cache, namecache);
 
 struct vnode {
        struct uvm_vnode v_uvm;                 /* uvm data */
@@ -110,6 +112,10 @@
                struct fifoinfo *vu_fifoinfo;   /* fifo (VFIFO) */
        } v_un;
 
+       /* VFS namecache */
+       struct namecache_rb_cache v_nc_tree;
+       TAILQ_HEAD(, namecache) v_cache_dst;     /* cache entries to us */
+
        enum    vtagtype v_tag;                 /* type of underlying data */
        void    *v_data;                        /* private data for fs */
        struct  selinfo v_selectinfo;           /* identity of poller(s) */
@@ -247,8 +253,9 @@
 extern int maxvnodes;                  /* XXX number of vnodes to allocate */
 extern time_t syncdelay;               /* time to delay syncing vnodes */
 extern int rushjob;                    /* # of slots syncer should run ASAP */
+extern void    vhold(struct vnode *);
+extern void    vdrop(struct vnode *);
 #endif /* _KERNEL */
-
 
 /*
  * Mods for exensibility.

Reply via email to