Author: kib
Date: Sun Apr 24 11:01:42 2011
New Revision: 220986
URL: http://svn.freebsd.org/changeset/base/220986

Log:
  Merge the part of r207141 that fixes the locking for ufs_rename() (and
  r218838 followup).
  
  Adopt the SU calls to the stable/8 SU implementation, with the help from
  Kirk.
  
  PR:   kern/156545
  Reviewed by:  mckusick
  Tested by:    pho

Modified:
  stable/8/sys/ufs/ufs/inode.h
  stable/8/sys/ufs/ufs/ufs_extern.h
  stable/8/sys/ufs/ufs/ufs_lookup.c
  stable/8/sys/ufs/ufs/ufs_vnops.c
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)

Modified: stable/8/sys/ufs/ufs/inode.h
==============================================================================
--- stable/8/sys/ufs/ufs/inode.h        Sun Apr 24 10:47:56 2011        
(r220985)
+++ stable/8/sys/ufs/ufs/inode.h        Sun Apr 24 11:01:42 2011        
(r220986)
@@ -120,7 +120,7 @@ struct inode {
 #define        IN_CHANGE       0x0002          /* Inode change time update 
request. */
 #define        IN_UPDATE       0x0004          /* Modification time update 
request. */
 #define        IN_MODIFIED     0x0008          /* Inode has been modified. */
-#define        IN_RENAME       0x0010          /* Inode is being renamed. */
+#define        IN_NEEDSYNC     0x0010          /* Inode requires fsync. */
 #define        IN_LAZYMOD      0x0040          /* Modified, but don't write 
yet. */
 #define        IN_SPACECOUNTED 0x0080          /* Blocks to be freed in free 
count. */
 #define        IN_LAZYACCESS   0x0100          /* Process IN_ACCESS after the

Modified: stable/8/sys/ufs/ufs/ufs_extern.h
==============================================================================
--- stable/8/sys/ufs/ufs/ufs_extern.h   Sun Apr 24 10:47:56 2011        
(r220985)
+++ stable/8/sys/ufs/ufs/ufs_extern.h   Sun Apr 24 11:01:42 2011        
(r220986)
@@ -57,7 +57,7 @@ int    ufs_bmap(struct vop_bmap_args *);
 int     ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *,
            struct buf *, int *, int *);
 int     ufs_fhtovp(struct mount *, struct ufid *, struct vnode **);
-int     ufs_checkpath(ino_t, struct inode *, struct ucred *);
+int     ufs_checkpath(ino_t, ino_t, struct inode *, struct ucred *, ino_t *);
 void    ufs_dirbad(struct inode *, doff_t, char *);
 int     ufs_dirbadentry(struct vnode *, struct direct *, int);
 int     ufs_dirempty(struct inode *, ino_t, struct ucred *);
@@ -66,9 +66,11 @@ int   ufs_extwrite(struct vop_write_args 
 void    ufs_makedirentry(struct inode *, struct componentname *,
            struct direct *);
 int     ufs_direnter(struct vnode *, struct vnode *, struct direct *,
-           struct componentname *, struct buf *);
+           struct componentname *, struct buf *, int);
 int     ufs_dirremove(struct vnode *, struct inode *, int, int);
 int     ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int);
+int     ufs_lookup_ino(struct vnode *, struct vnode **, struct componentname *,
+           ino_t *);
 int     ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *);
 int     ufs_inactive(struct vop_inactive_args *);
 int     ufs_init(struct vfsconf *);

Modified: stable/8/sys/ufs/ufs/ufs_lookup.c
==============================================================================
--- stable/8/sys/ufs/ufs/ufs_lookup.c   Sun Apr 24 10:47:56 2011        
(r220985)
+++ stable/8/sys/ufs/ufs/ufs_lookup.c   Sun Apr 24 11:01:42 2011        
(r220986)
@@ -76,9 +76,6 @@ SYSCTL_INT(_debug, OID_AUTO, dircheck, C
 /* true if old FS format...*/
 #define OFSFMT(vp)     ((vp)->v_mount->mnt_maxsymlinklen <= 0)
 
-static int ufs_lookup_(struct vnode *, struct vnode **, struct componentname *,
-    ino_t *);
-
 #ifdef QUOTA
 static int
 ufs_lookup_upgrade_lock(struct vnode *vp)
@@ -214,11 +211,11 @@ ufs_lookup(ap)
        } */ *ap;
 {
 
-       return (ufs_lookup_(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
+       return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
 }
 
-static int
-ufs_lookup_(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
+int
+ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname 
*cnp,
     ino_t *dd_ino)
 {
        struct inode *dp;               /* inode for directory being searched */
@@ -556,6 +553,8 @@ notfound:
        return (ENOENT);
 
 found:
+       if (dd_ino != NULL)
+               *dd_ino = ino;
        if (numdirpasses == 2)
                nchstats.ncs_pass2++;
        /*
@@ -578,11 +577,6 @@ found:
        if ((flags & ISLASTCN) && nameiop == LOOKUP)
                dp->i_diroff = i_offset &~ (DIRBLKSIZ - 1);
 
-       if (dd_ino != NULL) {
-               *dd_ino = ino;
-               return (0);
-       }
-
        /*
         * If deleting, and at end of pathname, return
         * parameters which can be used to remove file.
@@ -590,17 +584,6 @@ found:
        if (nameiop == DELETE && (flags & ISLASTCN)) {
                if (flags & LOCKPARENT)
                        ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
-               if ((error = VFS_VGET(vdp->v_mount, ino,
-                   LK_EXCLUSIVE, &tdp)) != 0)
-                       return (error);
-
-               error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
-               if (error) {
-                       vput(tdp);
-                       return (error);
-               }
-
-
                /*
                 * Return pointer to current entry in dp->i_offset,
                 * and distance past previous entry (if there
@@ -617,6 +600,16 @@ found:
                        dp->i_count = 0;
                else
                        dp->i_count = dp->i_offset - prevoff;
+               if (dd_ino != NULL)
+                       return (0);
+               if ((error = VFS_VGET(vdp->v_mount, ino,
+                   LK_EXCLUSIVE, &tdp)) != 0)
+                       return (error);
+               error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
+               if (error) {
+                       vput(tdp);
+                       return (error);
+               }
                if (dp->i_number == ino) {
                        VREF(vdp);
                        *vpp = vdp;
@@ -648,6 +641,8 @@ found:
                dp->i_offset = i_offset;
                if (dp->i_number == ino)
                        return (EISDIR);
+               if (dd_ino != NULL)
+                       return (0);
                if ((error = VFS_VGET(vdp->v_mount, ino,
                    LK_EXCLUSIVE, &tdp)) != 0)
                        return (error);
@@ -682,6 +677,8 @@ found:
                cnp->cn_flags |= SAVENAME;
                return (0);
        }
+       if (dd_ino != NULL)
+               return (0);
 
        /*
         * Step through the translation in the name.  We do not `vput' the
@@ -713,7 +710,7 @@ found:
                 * to the inode we looked up before vdp lock was
                 * dropped.
                 */
-               error = ufs_lookup_(pdp, NULL, cnp, &ino1);
+               error = ufs_lookup_ino(pdp, NULL, cnp, &ino1);
                if (error) {
                        vput(tdp);
                        return (error);
@@ -865,12 +862,13 @@ ufs_makedirentry(ip, cnp, newdirp)
  * soft dependency code).
  */
 int
-ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
+ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename)
        struct vnode *dvp;
        struct vnode *tvp;
        struct direct *dirp;
        struct componentname *cnp;
        struct buf *newdirbp;
+       int isrename;
 {
        struct ucred *cr;
        struct thread *td;
@@ -943,22 +941,30 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir
                                blkoff += DIRBLKSIZ;
                        }
                        if (softdep_setup_directory_add(bp, dp, dp->i_offset,
-                           dirp->d_ino, newdirbp, 1) == 0) {
-                               bdwrite(bp);
+                           dirp->d_ino, newdirbp, 1))
+                               dp->i_flag |= IN_NEEDSYNC;
+#ifdef JOURNALED_SOFTUPDATES
+                       if (newdirbp)
+                               bdwrite(newdirbp);
+#endif
+                       bdwrite(bp);
+                       if ((dp->i_flag & IN_NEEDSYNC) == 0)
                                return (UFS_UPDATE(dvp, 0));
-                       }
-                       /* We have just allocated a directory block in an
-                        * indirect block. Rather than tracking when it gets
-                        * claimed by the inode, we simply do a VOP_FSYNC
-                        * now to ensure that it is there (in case the user
-                        * does a future fsync). Note that we have to unlock
-                        * the inode for the entry that we just entered, as
-                        * the VOP_FSYNC may need to lock other inodes which
-                        * can lead to deadlock if we also hold a lock on
-                        * the newly entered node.
+                       /*
+                        * We have just allocated a directory block in an
+                        * indirect block.  We must prevent holes in the
+                        * directory created if directory entries are
+                        * written out of order.  To accomplish this we
+                        * fsync when we extend a directory into indirects.
+                        * During rename it's not safe to drop the tvp lock
+                        * so sync must be delayed until it is.
+                        *
+                        * This synchronous step could be removed if fsck and
+                        * the kernel were taught to fill in sparse
+                        * directories rather than panic.
                         */
-                       if ((error = bwrite(bp)))
-                               return (error);
+                       if (isrename)
+                               return (0);
                        if (tvp != NULL)
                                VOP_UNLOCK(tvp, 0);
                        error = VOP_FSYNC(dvp, MNT_WAIT, td);
@@ -1099,6 +1105,10 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir
                (void) softdep_setup_directory_add(bp, dp,
                    dp->i_offset + (caddr_t)ep - dirbuf,
                    dirp->d_ino, newdirbp, 0);
+#ifdef JOURNALED_SOFTUPDATES
+               if (newdirbp != NULL)
+                       bdwrite(newdirbp);
+#endif
                bdwrite(bp);
        } else {
                if (DOINGASYNC(dvp)) {
@@ -1116,7 +1126,8 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir
         * lock other inodes which can lead to deadlock if we also hold a
         * lock on the newly entered node.
         */
-       if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) {
+       if (isrename == 0 && error == 0 &&
+           dp->i_endoff && dp->i_endoff < dp->i_size) {
                if (tvp != NULL)
                        VOP_UNLOCK(tvp, 0);
 #ifdef UFS_DIRHASH
@@ -1386,25 +1397,25 @@ ufs_dir_dd_ino(struct vnode *vp, struct 
 
 /*
  * Check if source directory is in the path of the target directory.
- * Target is supplied locked, source is unlocked.
- * The target is always vput before returning.
  */
 int
-ufs_checkpath(ino_t source_ino, struct inode *target, struct ucred *cred)
+ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct 
ucred *cred, ino_t *wait_ino)
 {
-       struct vnode *vp, *vp1;
+       struct mount *mp;
+       struct vnode *tvp, *vp, *vp1;
        int error;
        ino_t dd_ino;
 
-       vp = ITOV(target);
-       if (target->i_number == source_ino) {
-               error = EEXIST;
-               goto out;
-       }
-       error = 0;
+       vp = tvp = ITOV(target);
+       mp = vp->v_mount;
+       *wait_ino = 0;
+       if (target->i_number == source_ino)
+               return (EEXIST);
+       if (target->i_number == parent_ino)
+               return (0);
        if (target->i_number == ROOTINO)
-               goto out;
-
+               return (0);
+       error = 0;
        for (;;) {
                error = ufs_dir_dd_ino(vp, cred, &dd_ino);
                if (error != 0)
@@ -1415,9 +1426,13 @@ ufs_checkpath(ino_t source_ino, struct i
                }
                if (dd_ino == ROOTINO)
                        break;
-               error = vn_vget_ino(vp, dd_ino, LK_EXCLUSIVE, &vp1);
-               if (error != 0)
+               if (dd_ino == parent_ino)
+                       break;
+               error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT, &vp1);
+               if (error != 0) {
+                       *wait_ino = dd_ino;
                        break;
+               }
                /* Recheck that ".." still points to vp1 after relock of vp */
                error = ufs_dir_dd_ino(vp, cred, &dd_ino);
                if (error != 0) {
@@ -1429,14 +1444,14 @@ ufs_checkpath(ino_t source_ino, struct i
                        vput(vp1);
                        continue;
                }
-               vput(vp);
+               if (vp != tvp)
+                       vput(vp);
                vp = vp1;
        }
 
-out:
        if (error == ENOTDIR)
-               printf("checkpath: .. not a directory\n");
-       if (vp != NULL)
+               panic("checkpath: .. not a directory\n");
+       if (vp != tvp)
                vput(vp);
        return (error);
 }

Modified: stable/8/sys/ufs/ufs/ufs_vnops.c
==============================================================================
--- stable/8/sys/ufs/ufs/ufs_vnops.c    Sun Apr 24 10:47:56 2011        
(r220985)
+++ stable/8/sys/ufs/ufs/ufs_vnops.c    Sun Apr 24 11:01:42 2011        
(r220986)
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/stat.h>
+#include <sys/sysctl.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
@@ -114,6 +115,8 @@ static vop_close_t  ufsfifo_close;
 static vop_kqfilter_t  ufsfifo_kqfilter;
 static vop_pathconf_t  ufsfifo_pathconf;
 
+SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
+
 /*
  * A virgin directory (no blushing please).
  */
@@ -992,7 +995,7 @@ ufs_link(ap)
        error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
        if (!error) {
                ufs_makedirentry(ip, cnp, &newdir);
-               error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
+               error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
        }
 
        if (error) {
@@ -1043,7 +1046,7 @@ ufs_whiteout(ap)
                newdir.d_namlen = cnp->cn_namelen;
                bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen 
+ 1);
                newdir.d_type = DT_WHT;
-               error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
+               error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
                break;
 
        case DELETE:
@@ -1062,6 +1065,11 @@ ufs_whiteout(ap)
        return (error);
 }
 
+static volatile int rename_restarts;
+SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
+    __DEVOLATILE(int *, &rename_restarts), 0,
+    "Times rename had to restart due to lock contention");
+
 /*
  * Rename system call.
  *     rename("foo", "bar");
@@ -1101,111 +1109,185 @@ ufs_rename(ap)
        struct vnode *tdvp = ap->a_tdvp;
        struct vnode *fvp = ap->a_fvp;
        struct vnode *fdvp = ap->a_fdvp;
+       struct vnode *nvp;
        struct componentname *tcnp = ap->a_tcnp;
        struct componentname *fcnp = ap->a_fcnp;
        struct thread *td = fcnp->cn_thread;
-       struct inode *ip, *xp, *dp;
+       struct inode *fip, *tip, *tdp, *fdp;
        struct direct newdir;
-       int doingdirectory = 0, oldparent = 0, newparent = 0;
+       off_t endoff;
+       int doingdirectory, newparent;
        int error = 0, ioflag;
-       ino_t fvp_ino;
+       struct mount *mp;
+       ino_t ino;
 
 #ifdef INVARIANTS
        if ((tcnp->cn_flags & HASBUF) == 0 ||
            (fcnp->cn_flags & HASBUF) == 0)
                panic("ufs_rename: no name");
 #endif
+       endoff = 0;
+       mp = tdvp->v_mount;
+       VOP_UNLOCK(tdvp, 0);
+       if (tvp && tvp != tdvp)
+               VOP_UNLOCK(tvp, 0);
        /*
         * Check for cross-device rename.
         */
        if ((fvp->v_mount != tdvp->v_mount) ||
            (tvp && (fvp->v_mount != tvp->v_mount))) {
                error = EXDEV;
-abortit:
-               if (tdvp == tvp)
-                       vrele(tdvp);
-               else
-                       vput(tdvp);
-               if (tvp)
-                       vput(tvp);
-               vrele(fdvp);
+               mp = NULL;
+               goto releout;
+       }
+       error = vfs_busy(mp, 0);
+       if (error) {
+               mp = NULL;
+               goto releout;
+       }
+relock:
+       /* 
+        * We need to acquire 2 to 4 locks depending on whether tvp is NULL
+        * and fdvp and tdvp are the same directory.  Subsequently we need
+        * to double-check all paths and in the directory rename case we
+        * need to verify that we are not creating a directory loop.  To
+        * handle this we acquire all but fdvp using non-blocking
+        * acquisitions.  If we fail to acquire any lock in the path we will
+        * drop all held locks, acquire the new lock in a blocking fashion,
+        * and then release it and restart the rename.  This acquire/release
+        * step ensures that we do not spin on a lock waiting for release.
+        */
+       error = vn_lock(fdvp, LK_EXCLUSIVE);
+       if (error)
+               goto releout;
+       if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
+               VOP_UNLOCK(fdvp, 0);
+               error = vn_lock(tdvp, LK_EXCLUSIVE);
+               if (error)
+                       goto releout;
+               VOP_UNLOCK(tdvp, 0);
+               atomic_add_int(&rename_restarts, 1);
+               goto relock;
+       }
+       /*
+        * Re-resolve fvp to be certain it still exists and fetch the
+        * correct vnode.
+        */
+       error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
+       if (error) {
+               VOP_UNLOCK(fdvp, 0);
+               VOP_UNLOCK(tdvp, 0);
+               goto releout;
+       }
+       error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
+       if (error) {
+               VOP_UNLOCK(fdvp, 0);
+               VOP_UNLOCK(tdvp, 0);
+               if (error != EBUSY)
+                       goto releout;
+               error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
+               if (error != 0)
+                       goto releout;
+               VOP_UNLOCK(nvp, 0);
                vrele(fvp);
-               return (error);
+               fvp = nvp;
+               atomic_add_int(&rename_restarts, 1);
+               goto relock;
+       }
+       vrele(fvp);
+       fvp = nvp;
+       /*
+        * Re-resolve tvp and acquire the vnode lock if present.
+        */
+       error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
+       if (error != 0 && error != EJUSTRETURN) {
+               VOP_UNLOCK(fdvp, 0);
+               VOP_UNLOCK(tdvp, 0);
+               VOP_UNLOCK(fvp, 0);
+               goto releout;
        }
-
+       /*
+        * If tvp disappeared we just carry on.
+        */
+       if (error == EJUSTRETURN && tvp != NULL) {
+               vrele(tvp);
+               tvp = NULL;
+       }
+       /*
+        * Get the tvp ino if the lookup succeeded.  We may have to restart
+        * if the non-blocking acquire fails.
+        */
+       if (error == 0) {
+               nvp = NULL;
+               error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
+               if (tvp)
+                       vrele(tvp);
+               tvp = nvp;
+               if (error) {
+                       VOP_UNLOCK(fdvp, 0);
+                       VOP_UNLOCK(tdvp, 0);
+                       VOP_UNLOCK(fvp, 0);
+                       if (error != EBUSY)
+                               goto releout;
+                       error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
+                       if (error != 0)
+                               goto releout;
+                       VOP_UNLOCK(nvp, 0);
+                       atomic_add_int(&rename_restarts, 1);
+                       goto relock;
+               }
+       }
+       fdp = VTOI(fdvp);
+       fip = VTOI(fvp);
+       tdp = VTOI(tdvp);
+       tip = NULL;
+       if (tvp)
+               tip = VTOI(tvp);
        if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
            (VTOI(tdvp)->i_flags & APPEND))) {
                error = EPERM;
-               goto abortit;
+               goto unlockout;
        }
-
        /*
         * Renaming a file to itself has no effect.  The upper layers should
-        * not call us in that case.  Temporarily just warn if they do.
+        * not call us in that case.  However, things could change after
+        * we drop the locks above.
         */
        if (fvp == tvp) {
-               printf("ufs_rename: fvp == tvp (can't happen)\n");
                error = 0;
-               goto abortit;
+               goto unlockout;
        }
-
-       if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
-               goto abortit;
-       dp = VTOI(fdvp);
-       ip = VTOI(fvp);
-       if (ip->i_nlink >= LINK_MAX) {
-               VOP_UNLOCK(fvp, 0);
+       doingdirectory = 0;
+       newparent = 0;
+       ino = fip->i_number;
+       if (fip->i_nlink >= LINK_MAX) {
                error = EMLINK;
-               goto abortit;
+               goto unlockout;
        }
-       if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
-           || (dp->i_flags & APPEND)) {
-               VOP_UNLOCK(fvp, 0);
+       if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
+           || (fdp->i_flags & APPEND)) {
                error = EPERM;
-               goto abortit;
+               goto unlockout;
        }
-       if ((ip->i_mode & IFMT) == IFDIR) {
+       if ((fip->i_mode & IFMT) == IFDIR) {
                /*
                 * Avoid ".", "..", and aliases of "." for obvious reasons.
                 */
                if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
-                   dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
-                   (ip->i_flag & IN_RENAME)) {
-                       VOP_UNLOCK(fvp, 0);
+                   fdp == fip ||
+                   (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
                        error = EINVAL;
-                       goto abortit;
+                       goto unlockout;
                }
-               ip->i_flag |= IN_RENAME;
-               oldparent = dp->i_number;
+               if (fdp->i_number != tdp->i_number)
+                       newparent = tdp->i_number;
                doingdirectory = 1;
        }
-       vrele(fdvp);
-
-       /*
-        * When the target exists, both the directory
-        * and target vnodes are returned locked.
-        */
-       dp = VTOI(tdvp);
-       xp = NULL;
-       if (tvp)
-               xp = VTOI(tvp);
-
-       /*
-        * 1) Bump link count while we're moving stuff
-        *    around.  If we crash somewhere before
-        *    completing our work, the link count
-        *    may be wrong, but correctable.
-        */
-       ip->i_effnlink++;
-       ip->i_nlink++;
-       DIP_SET(ip, i_nlink, ip->i_nlink);
-       ip->i_flag |= IN_CHANGE;
-       if (DOINGSOFTDEP(fvp))
-               softdep_change_linkcnt(ip);
-       if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
-                                      DOINGASYNC(fvp)))) != 0) {
-               VOP_UNLOCK(fvp, 0);
-               goto bad;
+       if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) ||
+           (tvp != NULL && tvp->v_type == VDIR &&
+           tvp->v_mountedhere != NULL)) {
+               error = EXDEV;
+               goto unlockout;
        }
 
        /*
@@ -1214,35 +1296,55 @@ abortit:
         * directory hierarchy above the target, as this would
         * orphan everything below the source directory. Also
         * the user must have write permission in the source so
-        * as to be able to change "..". We must repeat the call
-        * to namei, as the parent directory is unlocked by the
-        * call to checkpath().
-        */
-       error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
-       fvp_ino = ip->i_number;
-       VOP_UNLOCK(fvp, 0);
-       if (oldparent != dp->i_number)
-               newparent = dp->i_number;
+        * as to be able to change "..".
+        */
        if (doingdirectory && newparent) {
-               if (error)      /* write access check above */
-                       goto bad;
-               if (xp != NULL)
-                       vput(tvp);
-               error = ufs_checkpath(fvp_ino, dp, tcnp->cn_cred);
+               error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
                if (error)
-                       goto out;
+                       goto unlockout;
+               error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
+                   &ino);
+               /*
+                * We encountered a lock that we have to wait for.  Unlock
+                * everything else and VGET before restarting.
+                */
+               if (ino) {
+                       VOP_UNLOCK(fdvp, 0);
+                       VOP_UNLOCK(fvp, 0);
+                       VOP_UNLOCK(tdvp, 0);
+                       if (tvp)
+                               VOP_UNLOCK(tvp, 0);
+                       error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
+                       if (error == 0)
+                               vput(nvp);
+                       atomic_add_int(&rename_restarts, 1);
+                       goto relock;
+               }
+               if (error)
+                       goto unlockout;
                if ((tcnp->cn_flags & SAVESTART) == 0)
                        panic("ufs_rename: lost to startdir");
-               VREF(tdvp);
-               error = relookup(tdvp, &tvp, tcnp);
-               if (error)
-                       goto out;
-               vrele(tdvp);
-               dp = VTOI(tdvp);
-               xp = NULL;
-               if (tvp)
-                       xp = VTOI(tvp);
        }
+       if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
+           tdp->i_effnlink == 0)
+               panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
+
+       /*
+        * 1) Bump link count while we're moving stuff
+        *    around.  If we crash somewhere before
+        *    completing our work, the link count
+        *    may be wrong, but correctable.
+        */
+       fip->i_effnlink++;
+       fip->i_nlink++;
+       DIP_SET(fip, i_nlink, fip->i_nlink);
+       fip->i_flag |= IN_CHANGE;
+       if (DOINGSOFTDEP(fvp))
+               softdep_change_linkcnt(fip);
+       error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp)));
+       if (error)
+               goto bad;
+
        /*
         * 2) If target doesn't exist, link the target
         *    to the source and unlink the source.
@@ -1250,52 +1352,37 @@ abortit:
         *    entry to reference the source inode and
         *    expunge the original entry's existence.
         */
-       if (xp == NULL) {
-               if (dp->i_dev != ip->i_dev)
+       if (tip == NULL) {
+               if (tdp->i_dev != fip->i_dev)
                        panic("ufs_rename: EXDEV");
-               /*
-                * Account for ".." in new directory.
-                * When source and destination have the same
-                * parent we don't fool with the link count.
-                */
                if (doingdirectory && newparent) {
-                       if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+                       /*
+                        * Account for ".." in new directory.
+                        * When source and destination have the same
+                        * parent we don't adjust the link count.  The
+                        * actual link modification is completed when
+                        * .. is rewritten below.
+                        */
+                       if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
                                error = EMLINK;
                                goto bad;
                        }
-                       dp->i_effnlink++;
-                       dp->i_nlink++;
-                       DIP_SET(dp, i_nlink, dp->i_nlink);
-                       dp->i_flag |= IN_CHANGE;
-                       if (DOINGSOFTDEP(tdvp))
-                               softdep_change_linkcnt(dp);
-                       error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
-                                                  DOINGASYNC(tdvp)));
-                       if (error)
-                               goto bad;
                }
-               ufs_makedirentry(ip, tcnp, &newdir);
-               error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
-               if (error) {
-                       if (doingdirectory && newparent) {
-                               dp->i_effnlink--;
-                               dp->i_nlink--;
-                               DIP_SET(dp, i_nlink, dp->i_nlink);
-                               dp->i_flag |= IN_CHANGE;
-                               if (DOINGSOFTDEP(tdvp))
-                                       softdep_change_linkcnt(dp);
-                               (void)UFS_UPDATE(tdvp, 1);
-                       }
+               ufs_makedirentry(fip, tcnp, &newdir);
+               error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
+               if (error)
                        goto bad;
-               }
-               vput(tdvp);
+               /* Setup tdvp for directory compaction if needed. */
+               if (tdp->i_count && tdp->i_endoff &&
+                   tdp->i_endoff < tdp->i_size)
+                       endoff = tdp->i_endoff;
        } else {
-               if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
+               if (tip->i_dev != tdp->i_dev || tip->i_dev != fip->i_dev)
                        panic("ufs_rename: EXDEV");
                /*
                 * Short circuit rename(foo, foo).
                 */
-               if (xp->i_number == ip->i_number)
+               if (tip->i_number == fip->i_number)
                        panic("ufs_rename: same file");
                /*
                 * If the parent directory is "sticky", then the caller
@@ -1303,7 +1390,7 @@ abortit:
                 * destination of the rename.  This implements append-only
                 * directories.
                 */
-               if ((dp->i_mode & S_ISTXT) &&
+               if ((tdp->i_mode & S_ISTXT) &&
                    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
                    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
                        error = EPERM;
@@ -1314,9 +1401,9 @@ abortit:
                 * to it. Also, ensure source and target are compatible
                 * (both directories, or both not directories).
                 */
-               if ((xp->i_mode&IFMT) == IFDIR) {
-                       if ((xp->i_effnlink > 2) ||
-                           !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
+               if ((tip->i_mode & IFMT) == IFDIR) {
+                       if ((tip->i_effnlink > 2) ||
+                           !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
                                error = ENOTEMPTY;
                                goto bad;
                        }
@@ -1329,20 +1416,30 @@ abortit:
                        error = EISDIR;
                        goto bad;
                }
-               error = ufs_dirrewrite(dp, xp, ip->i_number,
-                   IFTODT(ip->i_mode),
-                   (doingdirectory && newparent) ? newparent : doingdirectory);
-               if (error)
-                       goto bad;
                if (doingdirectory) {
                        if (!newparent) {
-                               dp->i_effnlink--;
+                               tdp->i_effnlink--;
                                if (DOINGSOFTDEP(tdvp))
-                                       softdep_change_linkcnt(dp);
+                                       softdep_change_linkcnt(tdp);
                        }
-                       xp->i_effnlink--;
+                       tip->i_effnlink--;
                        if (DOINGSOFTDEP(tvp))
-                               softdep_change_linkcnt(xp);
+                               softdep_change_linkcnt(tip);
+               }
+               error = ufs_dirrewrite(tdp, tip, fip->i_number,
+                   IFTODT(fip->i_mode),
+                   (doingdirectory && newparent) ? newparent : doingdirectory);
+               if (error) {
+                       if (doingdirectory) {
+                               if (!newparent) {
+                                       tdp->i_effnlink++;
+                                       if (DOINGSOFTDEP(tdvp))
+                                               softdep_change_linkcnt(tdp);
+                               }
+                               tip->i_effnlink++;
+                               if (DOINGSOFTDEP(tvp))
+                                       softdep_change_linkcnt(tip);
+                       }
                }
                if (doingdirectory && !DOINGSOFTDEP(tvp)) {
                        /*
@@ -1357,115 +1454,107 @@ abortit:
                         * them now.
                         */
                        if (!newparent) {
-                               dp->i_nlink--;
-                               DIP_SET(dp, i_nlink, dp->i_nlink);
-                               dp->i_flag |= IN_CHANGE;
+                               tdp->i_nlink--;
+                               DIP_SET(tdp, i_nlink, tdp->i_nlink);
+                               tdp->i_flag |= IN_CHANGE;
                        }
-                       xp->i_nlink--;
-                       DIP_SET(xp, i_nlink, xp->i_nlink);
-                       xp->i_flag |= IN_CHANGE;
+                       tip->i_nlink--;
+                       DIP_SET(tip, i_nlink, tip->i_nlink);
+                       tip->i_flag |= IN_CHANGE;
                        ioflag = IO_NORMAL;
                        if (!DOINGASYNC(tvp))
                                ioflag |= IO_SYNC;
+                       /* Don't go to bad here as the new link exists. */
                        if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
                            tcnp->cn_cred, tcnp->cn_thread)) != 0)
-                               goto bad;
+                               goto unlockout;
                }
-               vput(tdvp);
-               vput(tvp);
-               xp = NULL;
        }
 
        /*
-        * 3) Unlink the source.
+        * 3) Unlink the source.  We have to resolve the path again to
+        * fixup the directory offset and count for ufs_dirremove.
         */
-       fcnp->cn_flags &= ~MODMASK;
-       fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
-       if ((fcnp->cn_flags & SAVESTART) == 0)
-               panic("ufs_rename: lost from startdir");
-       VREF(fdvp);
-       error = relookup(fdvp, &fvp, fcnp);
-       if (error == 0)
-               vrele(fdvp);
-       if (fvp != NULL) {
-               xp = VTOI(fvp);
-               dp = VTOI(fdvp);
-       } else {
-               /*
-                * From name has disappeared.  IN_RENAME is not sufficient
-                * to protect against directory races due to timing windows,
-                * so we have to remove the panic.  XXX the only real way
-                * to solve this issue is at a much higher level.  By the
-                * time we hit ufs_rename() it's too late.
-                */
-#if 0
-               if (doingdirectory)
-                       panic("ufs_rename: lost dir entry");
-#endif
-               vrele(ap->a_fvp);
-               return (0);
+       if (fdvp == tdvp) {
+               error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
+               if (error)
+                       panic("ufs_rename: from entry went away!");
+               if (ino != fip->i_number)
+                       panic("ufs_rename: ino mismatch %d != %d\n", ino,
+                           fip->i_number);
        }
        /*
-        * Ensure that the directory entry still exists and has not
-        * changed while the new name has been entered. If the source is
-        * a file then the entry may have been unlinked or renamed. In
-        * either case there is no further work to be done. If the source
-        * is a directory then it cannot have been rmdir'ed; the IN_RENAME
-        * flag ensures that it cannot be moved by another rename or removed
-        * by a rmdir.
-        */
-       if (xp != ip) {
-               /*
-                * From name resolves to a different inode.  IN_RENAME is
-                * not sufficient protection against timing window races
-                * so we can't panic here.  XXX the only real way
-                * to solve this issue is at a much higher level.  By the
-                * time we hit ufs_rename() it's too late.
-                */
-#if 0
-               if (doingdirectory)
-                       panic("ufs_rename: lost dir entry");
-#endif
-       } else {
+        * If the source is a directory with a
+        * new parent, the link count of the old
+        * parent directory must be decremented
+        * and ".." set to point to the new parent.
+        */
+       if (doingdirectory && newparent) {
                /*
-                * If the source is a directory with a
-                * new parent, the link count of the old
-                * parent directory must be decremented
-                * and ".." set to point to the new parent.
+                * If tip exists we simply use its link, otherwise we must
+                * add a new one.
                 */
-               if (doingdirectory && newparent) {
-                       xp->i_offset = mastertemplate.dot_reclen;
-                       ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
-                       cache_purge(fdvp);
-               }
-               error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
-               xp->i_flag &= ~IN_RENAME;
-       }
-       if (dp)
-               vput(fdvp);
-       if (xp)
-               vput(fvp);
-       vrele(ap->a_fvp);
+               if (tip == NULL) {
+                       tdp->i_effnlink++;
+                       tdp->i_nlink++;
+                       DIP_SET(tdp, i_nlink, tdp->i_nlink);
+                       tdp->i_flag |= IN_CHANGE;
+                       if (DOINGSOFTDEP(tdvp))
+                               softdep_change_linkcnt(tdp);
+                       error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
+                                                  DOINGASYNC(tdvp)));
+                       /* Don't go to bad here as the new link exists. */
+                       if (error)
+                               goto unlockout;
+               }
+               fip->i_offset = mastertemplate.dot_reclen;
+               ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
+               cache_purge(fdvp);
+       }
+       error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
+
+unlockout:
+       vput(fdvp);
+       vput(fvp);
+       if (tvp)
+               vput(tvp);
+       /*
+        * If compaction or fsync was requested do it now that other locks
+        * are no longer needed.
+        */
+       if (error == 0 && endoff != 0) {
+#ifdef UFS_DIRHASH
+               if (tdp->i_dirhash != NULL)
+                       ufsdirhash_dirtrunc(tdp, endoff);
+#endif
+               UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred,
+                   td);
+       }
+       if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
+               error = VOP_FSYNC(tdvp, MNT_WAIT, td);
+       vput(tdvp);
+       if (mp)
+               vfs_unbusy(mp);
        return (error);
 
 bad:
-       if (xp)
-               vput(ITOV(xp));
-       vput(ITOV(dp));
-out:
-       if (doingdirectory)
-               ip->i_flag &= ~IN_RENAME;
-       if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
-               ip->i_effnlink--;
-               ip->i_nlink--;
-               DIP_SET(ip, i_nlink, ip->i_nlink);
-               ip->i_flag |= IN_CHANGE;
-               ip->i_flag &= ~IN_RENAME;
-               if (DOINGSOFTDEP(fvp))
-                       softdep_change_linkcnt(ip);
-               vput(fvp);
-       } else
-               vrele(fvp);
+       fip->i_effnlink--;
+       fip->i_nlink--;
+       DIP_SET(fip, i_nlink, fip->i_nlink);
+       fip->i_flag |= IN_CHANGE;
+       if (DOINGSOFTDEP(fvp))
+               softdep_change_linkcnt(fip);
+       goto unlockout;
+
+releout:

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to