Like recommended from other developers I started developing WAPBL support for OpenBSD.
Looking at NetBSD and Bitrig I mage a first funcional patch. Index: sbin/mount/mntopts.h =================================================================== RCS file: /Volumes/CSP/cvs/src/sbin/mount/mntopts.h,v retrieving revision 1.16 diff -u -r1.16 mntopts.h --- sbin/mount/mntopts.h 13 Jul 2014 12:01:30 -0000 1.16 +++ sbin/mount/mntopts.h 23 Oct 2015 15:07:07 -0000 @@ -66,6 +66,8 @@ | MFLAG_OPT } #define MOPT_SOFTDEP { "softdep", MNT_SOFTDEP, MFLAG_SET } +#define MOPT_LOG { "log", MNT_LOG, MFLAG_SET } + /* Control flags. */ #define MOPT_FORCE { "force", MNT_FORCE, MFLAG_SET } #define MOPT_UPDATE { "update", MNT_UPDATE, MFLAG_SET } Index: sbin/mount/mount.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sbin/mount/mount.c,v retrieving revision 1.60 diff -u -r1.60 mount.c --- sbin/mount/mount.c 16 Jan 2015 06:39:59 -0000 1.60 +++ sbin/mount/mount.c 23 Oct 2015 15:07:07 -0000 @@ -94,6 +94,7 @@ { MNT_ROOTFS, 1, "root file system", "" }, { MNT_SYNCHRONOUS, 0, "synchronous", "sync" }, { MNT_SOFTDEP, 0, "softdep", "softdep" }, + { MNT_LOG, 0, "log", "log" }, { 0, 0, "", "" } }; Index: sbin/mount_ffs/mount_ffs.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sbin/mount_ffs/mount_ffs.c,v retrieving revision 1.21 diff -u -r1.21 mount_ffs.c --- sbin/mount_ffs/mount_ffs.c 16 Jan 2015 06:39:59 -0000 1.21 +++ sbin/mount_ffs/mount_ffs.c 23 Oct 2015 15:07:07 -0000 @@ -53,6 +53,7 @@ MOPT_RELOAD, MOPT_FORCE, MOPT_SOFTDEP, + MOPT_LOG, { NULL } }; Index: sys/conf/GENERIC =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/conf/GENERIC,v retrieving revision 1.220 diff -u -r1.220 GENERIC --- sys/conf/GENERIC 10 Aug 2015 20:35:36 -0000 1.220 +++ sys/conf/GENERIC 23 Oct 2015 15:07:07 -0000 @@ -43,6 +43,7 @@ option FIFO # FIFOs; RECOMMENDED option TMPFS # efficient memory file system option FUSE # FUSE +option WAPBL # Write Ahead Physical Block Logging option SOCKET_SPLICE # Socket Splicing for TCP and UDP option TCP_SACK # Selective Acknowledgements for TCP Index: sys/conf/files =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/conf/files,v retrieving revision 1.604 diff -u -r1.604 files --- sys/conf/files 9 Oct 2015 01:17:21 -0000 1.604 +++ sys/conf/files 23 Oct 2015 15:07:07 -0000 @@ -732,6 +732,7 @@ file kern/vfs_vops.c file kern/vfs_vnops.c file kern/vfs_getcwd.c +file kern/vfs_wapbl.c wapbl file kern/spec_vnops.c file miscfs/deadfs/dead_vnops.c file miscfs/fifofs/fifo_vnops.c fifo @@ -887,6 +888,7 @@ file ufs/ffs/ffs_vfsops.c ffs | mfs file ufs/ffs/ffs_vnops.c ffs | mfs file ufs/ffs/ffs_softdep.c ffs_softupdates +file ufs/ffs/ffs_wapbl.c ffs & wapbl file ufs/mfs/mfs_vfsops.c mfs file ufs/mfs/mfs_vnops.c mfs file ufs/ufs/ufs_bmap.c ffs | mfs | ext2fs @@ -898,6 +900,7 @@ file ufs/ufs/ufs_quota_stub.c ffs | mfs file ufs/ufs/ufs_vfsops.c ffs | mfs | ext2fs file ufs/ufs/ufs_vnops.c ffs | mfs | ext2fs +file ufs/ufs/ufs_wapbl.c ffs & wapbl file ufs/ext2fs/ext2fs_alloc.c ext2fs file ufs/ext2fs/ext2fs_balloc.c ext2fs file ufs/ext2fs/ext2fs_bmap.c ext2fs Index: sys/kern/spec_vnops.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/kern/spec_vnops.c,v retrieving revision 1.83 diff -u -r1.83 spec_vnops.c --- sys/kern/spec_vnops.c 10 Feb 2015 21:56:09 -0000 1.83 +++ sys/kern/spec_vnops.c 23 Oct 2015 15:07:07 -0000 @@ -408,6 +408,10 @@ return (EOPNOTSUPP); } +#ifdef WAPBL +extern int ffs_wapbl_fsync_vfs(struct vnode *, int); +#endif + /* * Synch buffers associated with a block device */ @@ -422,6 +426,15 @@ if (vp->v_type == VCHR) return (0); + + +#ifdef WAPBL + if (vp->v_type == VBLK && + vp->v_specmountpoint != NULL && + vp->v_specmountpoint->mnt_wapbl != NULL) + return (ffs_wapbl_fsync_vfs(vp, ap->a_waitfor)); +#endif + /* * Flush all dirty buffers associated with a block device. */ Index: sys/kern/vfs_bio.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/kern/vfs_bio.c,v retrieving revision 1.170 diff -u -r1.170 vfs_bio.c --- sys/kern/vfs_bio.c 19 Jul 2015 16:21:11 -0000 1.170 +++ sys/kern/vfs_bio.c 23 Oct 2015 15:07:07 -0000 @@ -56,7 +56,7 @@ #include <sys/resourcevar.h> #include <sys/conf.h> #include <sys/kernel.h> -#include <sys/specdev.h> +#include <sys/wapbl.h> #include <uvm/uvm_extern.h> int nobuffers; @@ -77,6 +77,7 @@ struct buf *bio_doread(struct vnode *, daddr_t, int, int); struct buf *buf_get(struct vnode *, daddr_t, size_t); void bread_cluster_callback(struct buf *); +static inline int injournal(struct buf *); struct bcachestats bcstats; /* counters */ long lodirtypages; /* dirty page count low water mark */ @@ -556,6 +557,16 @@ mp = NULL; /* + * If using WAPBL, convert it to a delayed write + */ + if (mp && mp->mnt_wapbl && injournal(bp)) { + if (bp->b_iodone != mp->mnt_wapbl_op->wo_wapbl_biodone) { + bdwrite(bp); + return 0; + } + } + + /* * Remember buffer type, to switch on it later. If the write was * synchronous, but the file system was mounted with MNT_ASYNC, * convert it to a delayed write. @@ -628,6 +639,20 @@ return (rv); } +/* + * Consider a buffer for an entry in the (WAPBL) journal. We do not want to log + * regular data blocks. + */ +static inline int +injournal(struct buf *bp) +{ + struct vnode *vp = bp->b_vp; + + if (wapbl_vphaswapbl(vp) && (vp->v_type != VREG || bp->b_lblkno < 0)) + return (1); + + return (0); +} /* * Delayed write. @@ -647,6 +672,20 @@ { int s; + /* If this is a tape block, write the block now. */ + if (major(bp->b_dev) < nblkdev && + bdevsw[major(bp->b_dev)].d_type == D_TAPE) { + bawrite(bp); + return; + } + + if (injournal(bp)) { + struct mount *mp = wapbl_vptomp(bp->b_vp); + + if (bp->b_iodone != mp->mnt_wapbl_op->wo_wapbl_biodone) + WAPBL_ADD_BUF(mp, bp); + } + /* * If the block hasn't been seen before: * (1) Mark it as having been seen, @@ -663,13 +702,6 @@ curproc->p_ru.ru_oublock++; /* XXX */ } - /* If this is a tape block, write the block now. */ - if (major(bp->b_dev) < nblkdev && - bdevsw[major(bp->b_dev)].d_type == D_TAPE) { - bawrite(bp); - return; - } - /* Otherwise, the "write" is done, so mark and release the buffer. */ CLR(bp->b_flags, B_NEEDCOMMIT); SET(bp->b_flags, B_DONE); @@ -743,12 +775,28 @@ * Determine which queue the buffer should be on, then put it there. */ + /* If it's locked, don't report an error; try again later */ + if (ISSET(bp->b_flags, (B_LOCKED|B_ERROR)) == (B_LOCKED|B_ERROR)) + CLR(bp->b_flags, B_ERROR); + /* If it's not cacheable, or an error, mark it invalid. */ if (ISSET(bp->b_flags, (B_NOCACHE|B_ERROR))) SET(bp->b_flags, B_INVAL); if (ISSET(bp->b_flags, B_INVAL)) { /* + * If using WAPBL + */ + if (ISSET(bp->b_flags, B_LOCKED)) { + if (wapbl_vphaswapbl(bp->b_vp)) { + struct mount *mp = wapbl_vptomp(bp->b_vp); + KASSERT(bp->b_iodone + != mp->mnt_wapbl_op->wo_wapbl_biodone); + WAPBL_REMOVE_BUF(mp, bp); + } + } + + /* * If the buffer is invalid, free it now rather than leaving * it in a queue and wasting memory. */ @@ -1079,6 +1127,19 @@ if (!ISSET(bp->b_flags, B_DELWRI)) panic("Clean buffer on dirty queue"); #endif + + +#ifdef WAPBL + if (ISSET(bp->b_flags, B_LOCKED) && + wapbl_vphaswapbl(bp->b_vp)) { + brelse(bp); + struct mount *mp = wapbl_vptomp(bp->b_vp); + wapbl_flush(mp->mnt_wapbl, 1); + s = splbio(); + continue; + } +#endif /* WAPBL */ + if (LIST_FIRST(&bp->b_dep) != NULL && !ISSET(bp->b_flags, B_DEFERRED) && buf_countdeps(bp, 0, 0)) { @@ -1206,6 +1267,17 @@ } #endif +void +buf_adjcnt(struct buf *bp, long ncount) +{ + KASSERT(ncount <= bp->b_bufsize); + long ocount = bp->b_bcount; + bp->b_bcount = ncount; + if (injournal(bp)) + WAPBL_RESIZE_BUF(wapbl_vptomp(bp->b_vp), bp, bp->b_bufsize, + ocount); +} + /* bufcache freelist code below */ /* * Copyright (c) 2014 Ted Unangst <t...@openbsd.org> Index: sys/kern/vfs_biomem.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/kern/vfs_biomem.c,v retrieving revision 1.34 diff -u -r1.34 vfs_biomem.c --- sys/kern/vfs_biomem.c 19 Jul 2015 21:21:14 -0000 1.34 +++ sys/kern/vfs_biomem.c 23 Oct 2015 15:07:07 -0000 @@ -89,7 +89,7 @@ { splassert(IPL_BIO); SET(bp->b_flags, B_BUSY); - if (bp->b_data != NULL) { + if (bp->b_data != NULL && !(bp->b_flags & B_LOCKED)) { TAILQ_REMOVE(&buf_valist, bp, b_valist); bcstats.kvaslots_avail--; bcstats.busymapped++; @@ -143,8 +143,11 @@ pmap_update(pmap_kernel()); bp->b_data = (caddr_t)va; } else { - TAILQ_REMOVE(&buf_valist, bp, b_valist); - bcstats.kvaslots_avail--; + if (!(bp->b_flags & B_LOCKED)) { + TAILQ_REMOVE(&buf_valist, bp, b_valist); + bcstats.kvaslots_avail--; + } else + return; } bcstats.busymapped++; @@ -157,7 +160,7 @@ KASSERT(bp->b_flags & B_BUSY); splassert(IPL_BIO); - if (bp->b_data) { + if (bp->b_data && !(bp->b_flags & B_LOCKED)) { bcstats.busymapped--; TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist); bcstats.kvaslots_avail++; @@ -191,6 +194,7 @@ bp->b_data = NULL; if (data) { + KASSERT(!(bp->b_flags & B_LOCKED)); if (bp->b_flags & B_BUSY) bcstats.busymapped--; pmap_kremove((vaddr_t)data, bp->b_bufsize); @@ -237,6 +241,7 @@ * buffers read in by bread_cluster */ bp->b_bufsize = newsize; + KASSERT(!(bp->b_flags & B_LOCKED)); } } Index: sys/kern/vfs_init.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/kern/vfs_init.c,v retrieving revision 1.36 diff -u -r1.36 vfs_init.c --- sys/kern/vfs_init.c 14 Mar 2015 03:38:51 -0000 1.36 +++ sys/kern/vfs_init.c 23 Oct 2015 15:07:07 -0000 @@ -42,6 +42,7 @@ #include <sys/namei.h> #include <sys/vnode.h> #include <sys/pool.h> +#include <sys/wapbl.h> struct pool namei_pool; @@ -156,6 +157,10 @@ /* Initialize the vnode name cache. */ nchinit(); +#ifdef WAPBL + wapbl_init(); +#endif + /* * Stop using vfsconf and maxvfsconf as a temporary storage, * set them to their correct values now. Index: sys/kern/vfs_subr.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/kern/vfs_subr.c,v retrieving revision 1.236 diff -u -r1.236 vfs_subr.c --- sys/kern/vfs_subr.c 13 Oct 2015 09:11:48 -0000 1.236 +++ sys/kern/vfs_subr.c 23 Oct 2015 15:07:07 -0000 @@ -62,7 +62,7 @@ #include <sys/syscallargs.h> #include <sys/pool.h> #include <sys/tree.h> -#include <sys/specdev.h> +#include <sys/wapbl.h> #include <netinet/in.h> @@ -921,7 +921,7 @@ void vclean(struct vnode *vp, int flags, struct proc *p) { - int active; + int active, error; /* * Check to see if the vnode is in use. @@ -955,8 +955,15 @@ /* * Clean out any buffers associated with the vnode. */ - if (flags & DOCLOSE) - vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); + if (flags & DOCLOSE) { + error = vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); + if (error != 0) { + if (wapbl_vphaswapbl(vp)) + WAPBL_DISCARD(wapbl_vptomp(vp)); + error = vinvalbuf(vp, 0, NOCRED, p, 0, 0); + } + KASSERT(error == 0); + } /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. Note that the @@ -1850,6 +1857,64 @@ return (0); } +/* + * Destroy any in core blocks past the truncation length. + * Called with the underlying vnode locked, which should prevent new dirty + * buffers from being queued. + */ +int +vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) +{ + struct buf *bp, *nbp; + int s, error; + + s = splbio(); +restart: + for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { + nbp = LIST_NEXT(bp, b_vnbufs); + if (bp->b_lblkno < lbn) + continue; + if (bp->b_flags & B_BUSY) { + bp->b_flags |= B_WANTED; + error = tsleep(bp, slpflag | (PRIBIO + 1), + "vtruncbuf", slptimeo); + if (error) { + splx(s); + return (error); + } + goto restart; + } + bremfree(bp); + buf_acquire_nomap(bp); + bp->b_flags |= B_INVAL; + brelse(bp); + } + + for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = LIST_NEXT(bp, b_vnbufs); + if (bp->b_lblkno < lbn) + continue; + if (bp->b_flags & B_BUSY) { + bp->b_flags |= B_WANTED; + error = tsleep(bp, slpflag | (PRIBIO + 1), + "vtruncbuf", slptimeo); + if (error) { + splx(s); + return (error); + } + goto restart; + } + bremfree(bp); + buf_acquire_nomap(bp); + bp->b_flags |= B_INVAL; + brelse(bp); + } + + splx(s); + + return (0); +} + void vflushbuf(struct vnode *vp, int sync) { Index: sys/kern/vfs_sync.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/kern/vfs_sync.c,v retrieving revision 1.54 diff -u -r1.54 vfs_sync.c --- sys/kern/vfs_sync.c 14 Mar 2015 03:38:51 -0000 1.54 +++ sys/kern/vfs_sync.c 23 Oct 2015 15:07:07 -0000 @@ -311,6 +311,18 @@ } /* + * Return delay factor appropriate for the given file system. For + * WAPBL we use the sync vnode to burst out metadata updates: sync + * those file systems more frequently. + */ +static inline int +sync_delay(struct mount *mp) +{ + + return mp->mnt_wapbl != NULL ? syncdelay / 3 : syncdelay; +} + +/* * Do a lazy sync of the filesystem. */ int @@ -330,7 +342,7 @@ /* * Move ourselves to the back of the sync list. */ - vn_syncer_add_to_worklist(syncvp, syncdelay); + vn_syncer_add_to_worklist(syncvp, sync_delay(mp)); /* * Walk the list of vnodes pushing all that are dirty and Index: sys/kern/vfs_syscalls.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/kern/vfs_syscalls.c,v retrieving revision 1.232 diff -u -r1.232 vfs_syscalls.c --- sys/kern/vfs_syscalls.c 20 Oct 2015 06:40:00 -0000 1.232 +++ sys/kern/vfs_syscalls.c 23 Oct 2015 15:07:07 -0000 @@ -253,10 +253,10 @@ mp->mnt_flag |= MNT_WANTRDWR; mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME | - MNT_FORCE); + MNT_FORCE | MNT_LOG); mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | - MNT_NOATIME | MNT_FORCE); + MNT_NOATIME | MNT_FORCE | MNT_LOG); /* * Mount the filesystem. */ Index: sys/sys/buf.h =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/sys/buf.h,v retrieving revision 1.99 diff -u -r1.99 buf.h --- sys/sys/buf.h 19 Jul 2015 16:21:11 -0000 1.99 +++ sys/sys/buf.h 23 Oct 2015 15:07:07 -0000 @@ -144,6 +144,7 @@ LIST_ENTRY(buf) b_list; /* All allocated buffers. */ LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */ TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */ + LIST_ENTRY(buf) b_wapbllist; /* transaction buffer list */ int cache; /* which cache are we in */ struct proc *b_proc; /* Associated proc; NULL if kernel. */ volatile long b_flags; /* B_* flags. */ @@ -157,6 +158,8 @@ TAILQ_ENTRY(buf) b_valist; /* LRU of va to reuse. */ + void * b_private; /* private data for owner */ + union bufq_data b_bufq; struct bufq *b_bq; /* What bufq this buf is on */ @@ -221,12 +224,14 @@ #define B_COLD 0x01000000 /* buffer is on the cold queue */ #define B_BC 0x02000000 /* buffer is managed by the cache */ #define B_DMA 0x04000000 /* buffer is DMA reachable */ +#define B_LOCKED 0x08000000 /* Locked in core (not reusable). */ #define B_BITS "\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \ "\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \ "\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \ "\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \ - "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA" + "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA" \ + "\034LOCKED" /* * This structure describes a clustered I/O. It is stored in the b_saveaddr @@ -254,6 +259,8 @@ /* Flags to low-level allocation routines. */ #define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */ #define B_SYNC 0x02 /* Do all allocations synchronously. */ +#define B_METAONLY 0x04 /* return indirect block buffer */ +#define B_CONTIG 0x08 /* allocate file contiguously */ struct cluster_info { daddr_t ci_lastr; /* last read (read-ahead) */ @@ -292,6 +299,7 @@ void bufinit(void); void buf_dirty(struct buf *); void buf_undirty(struct buf *); +void buf_adjcnt(struct buf *, long); int bwrite(struct buf *); struct buf *getblk(struct vnode *, daddr_t, int, int, int); struct buf *geteblk(int); Index: sys/sys/dkio.h =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/sys/dkio.h,v retrieving revision 1.9 diff -u -r1.9 dkio.h --- sys/sys/dkio.h 5 Jun 2011 18:40:33 -0000 1.9 +++ sys/sys/dkio.h 23 Oct 2015 15:07:07 -0000 @@ -83,4 +83,7 @@ #define DIOCMAP _IOWR('d', 119, struct dk_diskmap) +/* sync disk cache */ +#define DIOCCACHESYNC _IOW('d', 118, int) /* sync cache (force?) */ + #endif /* _SYS_DKIO_H_ */ Index: sys/sys/mount.h =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/sys/mount.h,v retrieving revision 1.121 diff -u -r1.121 mount.h --- sys/sys/mount.h 8 Sep 2014 01:47:06 -0000 1.121 +++ sys/sys/mount.h 23 Oct 2015 15:07:07 -0000 @@ -358,6 +358,11 @@ int mnt_maxsymlinklen; /* max size of short symlink */ struct statfs mnt_stat; /* cache of filesystem stats */ void *mnt_data; /* private data */ + struct wapbl_ops + *mnt_wapbl_op; /* logging ops */ + struct wapbl *mnt_wapbl; /* log info */ + struct wapbl_replay + *mnt_wapbl_replay; /* replay support XXX: what? */ }; /* @@ -396,7 +401,7 @@ /* * Mask of flags that are visible to statfs() */ -#define MNT_VISFLAGMASK 0x0400ffff +#define MNT_VISFLAGMASK 0x1400ffff #define MNT_BITS \ "\010\001RDONLY\002SYNCHRONOUS\003NOEXEC\004NOSUID\005NODEV" \ @@ -413,6 +418,7 @@ #define MNT_WANTRDWR 0x02000000 /* want upgrade to read/write */ #define MNT_SOFTDEP 0x04000000 /* soft dependencies being done */ #define MNT_DOOMED 0x08000000 /* device behind filesystem is gone */ +#define MNT_LOG 0x10000000 /* use logging */ /* * Flags for various system call interfaces. @@ -501,6 +507,51 @@ extern int bufbackoff(struct uvm_constraint_range*, long); /* + * This operations vector is so wapbl can be wrapped into a filesystem lkm. + * XXX Eventually, we want to move this functionality + * down into the filesystems themselves so that this isn't needed. + */ +struct wapbl_ops { + void (*wo_wapbl_discard)(struct wapbl *); + int (*wo_wapbl_replay_isopen)(struct wapbl_replay *); + int (*wo_wapbl_replay_can_read)(struct wapbl_replay *, daddr_t, long); + int (*wo_wapbl_replay_read)(struct wapbl_replay *, void *, daddr_t, + long); + void (*wo_wapbl_add_buf)(struct wapbl *, struct buf *); + void (*wo_wapbl_remove_buf)(struct wapbl *, struct buf *); + void (*wo_wapbl_resize_buf)(struct wapbl *, struct buf *, long, long); + int (*wo_wapbl_begin)(struct wapbl *, const char *, int); + void (*wo_wapbl_end)(struct wapbl *); + void (*wo_wapbl_junlock_assert)(struct wapbl *); + void (*wo_wapbl_biodone)(struct buf *); +}; +#define WAPBL_DISCARD(MP) \ + (*(MP)->mnt_wapbl_op->wo_wapbl_discard)((MP)->mnt_wapbl) +#define WAPBL_REPLAY_ISOPEN(MP) \ + (*(MP)->mnt_wapbl_op->wo_wapbl_replay_isopen)((MP)->mnt_wapbl_replay) +#define WAPBL_REPLAY_CAN_READ(MP, BLK, LEN) \ + (*(MP)->mnt_wapbl_op->wo_wapbl_replay_can_read)((MP)->mnt_wapbl_replay, \ + (BLK), (LEN)) +#define WAPBL_REPLAY_READ(MP, DATA, BLK, LEN) \ + (*(MP)->mnt_wapbl_op->wo_wapbl_replay_read)((MP)->mnt_wapbl_replay, \ + (DATA), (BLK), (LEN)) +#define WAPBL_ADD_BUF(MP, BP) \ + (*(MP)->mnt_wapbl_op->wo_wapbl_add_buf)((MP)->mnt_wapbl, (BP)) +#define WAPBL_REMOVE_BUF(MP, BP) \ + (*(MP)->mnt_wapbl_op->wo_wapbl_remove_buf)((MP)->mnt_wapbl, (BP)) +#define WAPBL_RESIZE_BUF(MP, BP, OLDSZ, OLDCNT) \ + (*(MP)->mnt_wapbl_op->wo_wapbl_resize_buf)((MP)->mnt_wapbl, (BP), \ + (OLDSZ), (OLDCNT)) +#define WAPBL_BEGIN(MP) \ + (*(MP)->mnt_wapbl_op->wo_wapbl_begin)((MP)->mnt_wapbl, \ + __FILE__, __LINE__) +#define WAPBL_END(MP) \ + (*(MP)->mnt_wapbl_op->wo_wapbl_end)((MP)->mnt_wapbl) +#define WAPBL_JUNLOCK_ASSERT(MP) \ + (*(MP)->mnt_wapbl_op->wo_wapbl_junlock_assert)((MP)->mnt_wapbl) + + +/* * Operations supported on mounted file system. */ struct nameidata; Index: sys/sys/specdev.h =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/sys/specdev.h,v retrieving revision 1.34 diff -u -r1.34 specdev.h --- sys/sys/specdev.h 2 Nov 2013 00:16:31 -0000 1.34 +++ sys/sys/specdev.h 23 Oct 2015 15:07:07 -0000 @@ -37,6 +37,10 @@ * special devices. It is allocated in checkalias and freed * in vgone. */ + +#ifndef _SYS_SPECDEV_H_ +#define _SYS_SPECDEV_H_ + struct specinfo { struct vnode **si_hashchain; struct vnode *si_specnext; @@ -108,3 +112,4 @@ int spec_advlock(void *); #endif /* _KERNEL */ +#endif /* _SYS_SPECDEV_H_ */ Index: sys/sys/stat.h =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/sys/stat.h,v retrieving revision 1.28 diff -u -r1.28 stat.h --- sys/sys/stat.h 4 Apr 2015 18:06:08 -0000 1.28 +++ sys/sys/stat.h 23 Oct 2015 15:07:07 -0000 @@ -173,6 +173,7 @@ #define SF_ARCHIVED 0x00010000 /* file is archived */ #define SF_IMMUTABLE 0x00020000 /* file may not be changed */ #define SF_APPEND 0x00040000 /* writes to file may only append */ +#define SF_LOG 0x00400000 /* WAPBL log file inode */ #ifdef _KERNEL /* Index: sys/sys/vnode.h =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/sys/vnode.h,v retrieving revision 1.132 diff -u -r1.132 vnode.h --- sys/sys/vnode.h 7 May 2015 08:53:33 -0000 1.132 +++ sys/sys/vnode.h 23 Oct 2015 15:07:07 -0000 @@ -185,13 +185,14 @@ /* * Flags for ioflag. */ -#define IO_UNIT 0x01 /* do I/O as atomic unit */ -#define IO_APPEND 0x02 /* append write to end */ -#define IO_SYNC 0x04 /* do I/O synchronously */ -#define IO_NODELOCKED 0x08 /* underlying node already locked */ -#define IO_NDELAY 0x10 /* FNDELAY flag set in file table */ -#define IO_NOLIMIT 0x20 /* don't enforce limits on i/o */ -#define IO_NOCACHE 0x40 /* don't cache result of this i/o */ +#define IO_UNIT 0x01 /* I/O as atomic unit */ +#define IO_APPEND 0x02 /* append write to end */ +#define IO_SYNC 0x04 /* do I/O synchronously */ +#define IO_NODELOCKED 0x08 /* underlying node already locked */ +#define IO_NDELAY 0x10 /* FNDELAY flag set in file table */ +#define IO_NOLIMIT 0x20 /* don't enforce limits on i/o */ +#define IO_NOCACHE 0x40 /* don't cache result of this i/o */ +#define IO_JOURNALLOCKED 0x80 /* journal is already locked */ /* * Modes. Some values same as Ixxx entries from inode.h for now. @@ -596,6 +597,7 @@ void vdevgone(int, int, int, enum vtype); int vcount(struct vnode *); int vfinddev(dev_t, enum vtype, struct vnode **); +int vtruncbuf(struct vnode *, daddr_t, int, int); void vflushbuf(struct vnode *, int); int vflush(struct mount *, struct vnode *, int); int vget(struct vnode *, int, struct proc *); Index: sys/ufs/ffs/ffs_alloc.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ffs/ffs_alloc.c,v retrieving revision 1.105 diff -u -r1.105 ffs_alloc.c --- sys/ufs/ffs/ffs_alloc.c 27 Sep 2015 05:25:00 -0000 1.105 +++ sys/ufs/ffs/ffs_alloc.c 23 Oct 2015 15:07:07 -0000 @@ -49,11 +49,13 @@ #include <sys/syslog.h> #include <sys/stdint.h> #include <sys/time.h> +#include <sys/wapbl.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> +#include <ufs/ufs/ufs_wapbl.h> #include <ufs/ffs/fs.h> #include <ufs/ffs/ffs_extern.h> @@ -63,16 +65,19 @@ (fs)->fs_fsmnt, (cp)); \ } while (0) -daddr_t ffs_alloccg(struct inode *, int, daddr_t, int); -struct buf * ffs_cgread(struct fs *, struct inode *, int); -daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t); -daddr_t ffs_clusteralloc(struct inode *, int, daddr_t, int); +daddr_t ffs_alloccg(struct inode *, int, daddr_t, int, int); +struct buf * ffs_cgread(struct fs *, struct vnode *, int); +daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t, int); +daddr_t ffs_clusteralloc(struct inode *, int, daddr_t, int, int); ufsino_t ffs_dirpref(struct inode *); daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int); -daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int, - daddr_t (*)(struct inode *, int, daddr_t, int)); -daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int); +daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int, int, + daddr_t (*)(struct inode *, int, daddr_t, int, int)); +daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int, int); daddr_t ffs_mapsearch(struct fs *, struct cg *, daddr_t, int); +void ffs_blkfree_subr(struct fs *, struct vnode *, + struct inode *, daddr_t bno, long size); + int ffs1_reallocblks(void *); #ifdef FFS2 @@ -106,7 +111,7 @@ * available block is located. */ int -ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size, +ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size, int flags, struct ucred *cred, daddr_t *bnp) { static struct timeval fsfull_last; @@ -147,7 +152,7 @@ cg = dtog(fs, bpref); /* Try allocating a block. */ - bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); + bno = ffs_hashalloc(ip, cg, bpref, size, flags, ffs_alloccg); if (bno > 0) { /* allocation successful, update inode data */ DIP_ADD(ip, blocks, btodb(size)); @@ -159,6 +164,12 @@ /* Restore user's disk quota because allocation failed. */ (void) ufs_quota_free_blocks(ip, btodb(size), cred); + if (flags & B_CONTIG) { + /* + * Fail silently -- it's up to our caller to report errors. + */ + return (ENOSPC); + } nospace: if (ratecheck(&fsfull_last, &fserr_interval)) { ffs_fserr(fs, cred->cr_uid, "file system full"); @@ -178,7 +189,7 @@ */ int ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize, - int nsize, struct ucred *cred, struct buf **bpp, daddr_t *blknop) + int nsize, int flags, struct ucred *cred, struct buf **bpp, daddr_t *blknop) { static struct timeval fsfull_last; struct fs *fs; @@ -218,7 +229,7 @@ if (bpp != NULL) { if ((error = bread(ITOV(ip), lbprev, fs->fs_bsize, &bp)) != 0) goto error; - bp->b_bcount = osize; + buf_adjcnt(bp, osize); } if ((error = ufs_quota_alloc_blocks(ip, btodb(nsize - osize), cred)) @@ -241,7 +252,7 @@ if (nsize > bp->b_bufsize) panic("ffs_realloccg: small buf"); #endif - bp->b_bcount = nsize; + buf_adjcnt(bp, nsize); bp->b_flags |= B_DONE; memset(bp->b_data + osize, 0, nsize - osize); *bpp = bp; @@ -295,16 +306,29 @@ panic("ffs_realloccg: bad optim"); /* NOTREACHED */ } - bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); + bno = ffs_hashalloc(ip, cg, bpref, request, flags, ffs_alloccg); if (bno <= 0) goto nospace; (void) uvm_vnp_uncache(ITOV(ip)); - if (!DOINGSOFTDEP(ITOV(ip))) - ffs_blkfree(ip, bprev, (long)osize); - if (nsize < request) - ffs_blkfree(ip, bno + numfrags(fs, nsize), - (long)(request - nsize)); + if (ip->i_ump->um_mountp->mnt_wapbl && ITOV(ip)->v_type != VREG) { + UFS_WAPBL_REGISTER_DEALLOCATION(ip->i_ump->um_mountp, + fsbtodb(fs, bprev), osize); + } else { + if (!DOINGSOFTDEP(ITOV(ip))) + ffs_blkfree(ip, bprev, (long)osize); + } + if (nsize < request) { + if (ip->i_ump->um_mountp->mnt_wapbl && + ITOV(ip)->v_type != VREG) { + UFS_WAPBL_REGISTER_DEALLOCATION(ip->i_ump->um_mountp, + fsbtodb(fs, (bno + numfrags(fs, nsize))), + request - nsize); + } else { + ffs_blkfree(ip, bno + numfrags(fs, nsize), + (long)(request - nsize)); + } + } DIP_ADD(ip, blocks, btodb(nsize - osize)); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp != NULL) { @@ -313,7 +337,7 @@ if (nsize > bp->b_bufsize) panic("ffs_realloccg: small buf 2"); #endif - bp->b_bcount = nsize; + buf_adjcnt(bp, nsize); bp->b_flags |= B_DONE; memset(bp->b_data + osize, 0, nsize - osize); *bpp = bp; @@ -434,7 +458,7 @@ /* * Find the preferred location for the cluster. */ - pref = ffs1_blkpref(ip, start_lbn, soff, sbap); + pref = ffs1_blkpref(ip, start_lbn, soff, 0, sbap); /* * If the block range spans two block maps, get the second map. */ @@ -454,7 +478,7 @@ /* * Search the block map looking for an allocation of the desired size. */ - if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, len, + if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, len, 0, ffs_clusteralloc)) == 0) goto fail; /* @@ -538,10 +562,17 @@ printf("\n\tnew:"); #endif for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { + if (ip->i_ump->um_mountp->mnt_wapbl && + ITOV(ip)->v_type != VREG) { + UFS_WAPBL_REGISTER_DEALLOCATION(ip->i_ump->um_mountp, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), + fs->fs_bsize); + } else { if (!DOINGSOFTDEP(vp)) ffs_blkfree(ip, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), + buflist->bs_children[i]->b_blkno, fs->fs_bsize); + } buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef DIAGNOSTIC if (!ffs_checkblk(ip, @@ -660,13 +691,13 @@ /* * Find the preferred location for the cluster. */ - pref = ffs2_blkpref(ip, start_lbn, soff, sbap); + pref = ffs2_blkpref(ip, start_lbn, soff, 0, sbap); /* * Search the block map looking for an allocation of the desired size. */ if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, - len, ffs_clusteralloc)) == 0) + len, 0, ffs_clusteralloc)) == 0) goto fail; /* @@ -753,9 +784,16 @@ printf("\n\tnew:"); #endif for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { - if (!DOINGSOFTDEP(vp)) - ffs_blkfree(ip, dbtofsb(fs, - buflist->bs_children[i]->b_blkno), fs->fs_bsize); + if (ip->i_ump->um_mountp->mnt_wapbl && + ITOV(ip)->v_type != VREG) { + UFS_WAPBL_REGISTER_DEALLOCATION(ip->i_ump->um_mountp, + buflist->bs_children[i]->b_blkno, fs->fs_bsize); + } else { + if (!DOINGSOFTDEP(vp)) + ffs_blkfree(ip, dbtofsb(fs, + buflist->bs_children[i]->b_blkno), + fs->fs_bsize); + } buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef DIAGNOSTIC if (!ffs_checkblk(ip, @@ -831,8 +869,15 @@ ufsino_t ino, ipref; int cg, error; + UFS_WAPBL_JUNLOCK_ASSERT(pvp->v_mount); + *vpp = NULL; fs = pip->i_fs; + + error = UFS_WAPBL_BEGIN(pvp->v_mount); + if (error) + return error; + if (fs->fs_cstotal.cs_nifree == 0) goto noinodes; @@ -855,12 +900,18 @@ if (fs->fs_contigdirs[cg] > 0) fs->fs_contigdirs[cg]--; } - ino = (ufsino_t)ffs_hashalloc(pip, cg, ipref, mode, ffs_nodealloccg); + ino = (ufsino_t)ffs_hashalloc(pip, cg, ipref, mode, 0, ffs_nodealloccg); if (ino == 0) goto noinodes; + UFS_WAPBL_END(pvp->v_mount); error = VFS_VGET(pvp->v_mount, ino, vpp); if (error) { - ffs_inode_free(pip, ino, mode); + int err; + err = UFS_WAPBL_BEGIN(pvp->v_mount); + if (err == 0) { + ffs_inode_free(pip, ino, mode); + UFS_WAPBL_END(pvp->v_mount); + } return (error); } @@ -896,6 +947,7 @@ return (0); noinodes: + UFS_WAPBL_END(pvp->v_mount); if (ratecheck(&fsnoinodes_last, &fserr_interval)) { ffs_fserr(fs, cred->cr_uid, "out of inodes"); uprintf("\n%s: create/symlink failed, no inodes free\n", @@ -1060,7 +1112,7 @@ * allocated. */ int32_t -ffs1_blkpref(struct inode *ip, daddr_t lbn, int indx, int32_t *bap) +ffs1_blkpref(struct inode *ip, daddr_t lbn, int indx, int flags, int32_t *bap) { struct fs *fs; int cg, inocg, avgbfree, startcg; @@ -1068,6 +1120,26 @@ KASSERT(indx <= 0 || bap != NULL); fs = ip->i_fs; + + /* + * If allocating a contiguous file with B_CONTIG, use the hints + * in the inode extentions to return the desired block. + * + * For metadata (indirect blocks) return the address of where + * the first indirect block resides - we'll scan for the next + * available slot if we need to allocate more than one indirect + * block. For data, return the address of the actual block + * relative to the address of the first data block. + */ + if (flags & B_CONTIG) { + KASSERT(ip->i_ffs_first_data_blk != 0); + KASSERT(ip->i_ffs_first_indir_blk != 0); + if (flags & B_METAONLY) + return ip->i_ffs_first_indir_blk; + else + return ip->i_ffs_first_data_blk + blkstofrags(fs, lbn); + } + /* * Allocation of indirect blocks is indicated by passing negative * values in indx: -1 for single indirect, -2 for double indirect, @@ -1160,7 +1232,7 @@ */ #ifdef FFS2 int64_t -ffs2_blkpref(struct inode *ip, daddr_t lbn, int indx, int64_t *bap) +ffs2_blkpref(struct inode *ip, daddr_t lbn, int indx, int flags, int64_t *bap) { struct fs *fs; int cg, inocg, avgbfree, startcg; @@ -1168,6 +1240,26 @@ KASSERT(indx <= 0 || bap != NULL); fs = ip->i_fs; + + /* + * If allocating a contiguous file with B_CONTIG, use the hints + * in the inode extentions to return the desired block. + * + * For metadata (indirect blocks) return the address of where + * the first indirect block resides - we'll scan for the next + * available slot if we need to allocate more than one indirect + * block. For data, return the address of the actual block + * relative to the address of the first data block. + */ + if (flags & B_CONTIG) { + KASSERT(ip->i_ffs_first_data_blk != 0); + KASSERT(ip->i_ffs_first_indir_blk != 0); + if (flags & B_METAONLY) + return ip->i_ffs_first_indir_blk; + else + return ip->i_ffs_first_data_blk + blkstofrags(fs, lbn); + } + /* * Allocation of indirect blocks is indicated by passing negative * values in indx: -1 for single indirect, -2 for double indirect, @@ -1267,8 +1359,8 @@ * 3) brute force search for a free block. */ daddr_t -ffs_hashalloc(struct inode *ip, int cg, daddr_t pref, int size, - daddr_t (*allocator)(struct inode *, int, daddr_t, int)) +ffs_hashalloc(struct inode *ip, int cg, daddr_t pref, int size, int flags, + daddr_t (*allocator)(struct inode *, int, daddr_t, int, int)) { struct fs *fs; daddr_t result; @@ -1278,9 +1370,13 @@ /* * 1: preferred cylinder group */ - result = (*allocator)(ip, cg, pref, size); + result = (*allocator)(ip, cg, pref, size, flags); if (result) return (result); + + if (flags & B_CONTIG) + return (result); + /* * 2: quadratic rehash */ @@ -1288,7 +1384,7 @@ cg += i; if (cg >= fs->fs_ncg) cg -= fs->fs_ncg; - result = (*allocator)(ip, cg, 0, size); + result = (*allocator)(ip, cg, 0, size, flags); if (result) return (result); } @@ -1299,7 +1395,7 @@ */ cg = (icg + 2) % fs->fs_ncg; for (i = 2; i < fs->fs_ncg; i++) { - result = (*allocator)(ip, cg, 0, size); + result = (*allocator)(ip, cg, 0, size, flags); if (result) return (result); cg++; @@ -1310,11 +1406,11 @@ } struct buf * -ffs_cgread(struct fs *fs, struct inode *ip, int cg) +ffs_cgread(struct fs *fs, struct vnode *devvp, int cg) { struct buf *bp; - if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + if (bread(devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, &bp)) { brelse(bp); return (NULL); @@ -1353,7 +1449,7 @@ return (0); } - if (!(bp = ffs_cgread(fs, ip, cg))) + if (!(bp = ffs_cgread(fs, ip->i_devvp, cg))) return (0); cgp = (struct cg *)bp->b_data; @@ -1398,7 +1494,7 @@ * and if it is, allocate it. */ daddr_t -ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) +ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size, int flags) { struct fs *fs; struct cg *cgp; @@ -1410,7 +1506,7 @@ if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) return (0); - if (!(bp = ffs_cgread(fs, ip, cg))) + if (!(bp = ffs_cgread(fs, ip->i_devvp, cg))) return (0); cgp = (struct cg *)bp->b_data; @@ -1423,7 +1519,7 @@ if (size == fs->fs_bsize) { /* allocate and return a complete data block */ - bno = ffs_alloccgblk(ip, bp, bpref); + bno = ffs_alloccgblk(ip, bp, bpref, flags); bdwrite(bp); return (bno); } @@ -1445,7 +1541,7 @@ brelse(bp); return (0); } - bno = ffs_alloccgblk(ip, bp, bpref); + bno = ffs_alloccgblk(ip, bp, bpref, flags); bpref = dtogd(fs, bno); for (i = frags; i < fs->fs_frag; i++) setbit(cg_blksfree(cgp), bpref + i); @@ -1487,7 +1583,7 @@ * blocks may be fragmented by the routine that allocates them. */ daddr_t -ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref) +ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref, int flags) { struct fs *fs; struct cg *cgp; @@ -1515,6 +1611,12 @@ if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) goto gotit; /* + * if the requested data block isn't available and we are trying to + * allocate a contiguous file, return an error. + */ + if ((flags & (B_CONTIG | B_METAONLY)) == B_CONTIG) + return (0); + /* * Take the next available block in this cylinder group. */ bno = ffs_mapsearch(fs, cgp, bpref, (int) fs->fs_frag); @@ -1556,7 +1658,7 @@ * take the first one that we find following bpref. */ daddr_t -ffs_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) +ffs_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len, int flags) { struct fs *fs; struct cg *cgp; @@ -1569,7 +1671,7 @@ if (fs->fs_maxcluster[cg] < len) return (0); - if (!(bp = ffs_cgread(fs, ip, cg))) + if (!(bp = ffs_cgread(fs, ip->i_devvp, cg))) return (0); cgp = (struct cg *)bp->b_data; @@ -1651,7 +1753,7 @@ len = blkstofrags(fs, len); for (i = 0; i < len; i += fs->fs_frag) - if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) + if (ffs_alloccgblk(ip, bp, bno + i, flags) != bno + i) panic("ffs_clusteralloc: lost block"); bdwrite(bp); return (bno); @@ -1663,7 +1765,7 @@ /* inode allocation routine */ daddr_t -ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) +ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode, int flags) { struct fs *fs; struct cg *cgp; @@ -1680,10 +1782,11 @@ * and in the cylinder group itself. */ fs = ip->i_fs; + UFS_WAPBL_JLOCK_ASSERT(ip->i_ump->um_mountp); if (fs->fs_cs(fs, cg).cs_nifree == 0) return (0); - if (!(bp = ffs_cgread(fs, ip, cg))) + if (!(bp = ffs_cgread(fs, ip->i_devvp, cg))) return (0); cgp = (struct cg *)bp->b_data; @@ -1758,6 +1861,9 @@ gotit: + UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, cg * fs->fs_ipg + ipref, + mode); + #ifdef FFS2 /* * For FFS2, check if all inodes in this cylinder group have been used @@ -1819,6 +1925,119 @@ } /* + * Allocate a block or fragment. + * + * The specified block or fragment is removed from the + * free map, possibly fragmenting a block in the process. + * + * This implementation should mirror fs_blkfree + */ +int +ffs_blkalloc_ump(struct ufsmount *ump, daddr_t bno, long size) +{ + struct fs *fs = ump->um_fs; + struct cg *cgp; + struct buf *bp; + int32_t fragno, cgbno; + int i, error, cg, blk, frags, bbase; + u_int8_t *blksfree; + + KASSERT((u_int)size <= fs->fs_bsize && fragoff(fs, size) == 0 && + fragnum(fs, bno) + numfrags(fs, size) <= fs->fs_frag); + KASSERT(bno < fs->fs_size); + + cg = dtog(fs, bno); + error = bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, &bp); + if (error) { + brelse(bp); + return error; + } + cgp = (struct cg *)bp->b_data; + if (!cg_chkmagic(cgp)) { + brelse(bp); + return EIO; + } + cgp->cg_ffs2_time = time_second; + cgp->cg_time = (int32_t)cgp->cg_ffs2_time; + cgbno = dtogd(fs, bno); + blksfree = cg_blksfree(cgp); + + if (size == fs->fs_bsize) { + fragno = fragstoblks(fs, cgbno); + if (!ffs_isblock(fs, blksfree, fragno)) { + brelse(bp); + return EBUSY; + } + ffs_clrblock(fs, blksfree, fragno); + ffs_clusteracct(fs, cgp, fragno, -1); + cgp->cg_cs.cs_nbfree--; + fs->fs_cstotal.cs_nbfree--; + fs->fs_cs(fs, cg).cs_nbfree--; + } else { + bbase = cgbno - fragnum(fs, cgbno); + + frags = numfrags(fs, size); + for (i = 0; i < frags; i++) { + if (isclr(blksfree, cgbno + i)) { + brelse(bp); + return EBUSY; + } + } + /* + * if a complete block is being split, account for it + */ + fragno = fragstoblks(fs, bbase); + if (ffs_isblock(fs, blksfree, fragno)) { + cgp->cg_cs.cs_nffree += fs->fs_frag; + fs->fs_cstotal.cs_nffree += fs->fs_frag; + fs->fs_cs(fs, cg).cs_nffree += fs->fs_frag; + ffs_clusteracct(fs, cgp, fragno, -1); + cgp->cg_cs.cs_nbfree--; + fs->fs_cstotal.cs_nbfree--; + fs->fs_cs(fs, cg).cs_nbfree--; + } + /* + * decrement the counts associated with the old frags + */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, -1); + /* + * allocate the fragment + */ + for (i = 0; i < frags; i++) { + clrbit(blksfree, cgbno + i); + } + cgp->cg_cs.cs_nffree -= i; + fs->fs_cstotal.cs_nffree -= i; + fs->fs_cs(fs, cg).cs_nffree -= i; + /* + * add back in counts associated with the new frags + */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, 1); + } + fs->fs_fmod = 1; + bdwrite(bp); + return 0; +} + +#ifdef WAPBL +void +ffs_wapbl_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, + long size) +{ + ffs_blkfree_subr(fs, devvp, NULL, bno, size); +} +#endif /* WAPBL */ + +void +ffs_blkfree(struct inode *ip, daddr_t bno, long size) +{ + ffs_blkfree_subr(NULL, NULL, ip, bno, size); +} + +/* * Free a block or fragment. * * The specified block or fragment is placed back in the @@ -1826,29 +2045,44 @@ * block reassembly is checked. */ void -ffs_blkfree(struct inode *ip, daddr_t bno, long size) +ffs_blkfree_subr(struct fs *fs, struct vnode *devvp, struct inode *ip, + daddr_t bno, long size) { - struct fs *fs; struct cg *cgp; struct buf *bp; daddr_t blkno; int i, cg, blk, frags, bbase; - fs = ip->i_fs; + KASSERT((fs != NULL && devvp != NULL) ^ (ip != NULL)); + + if (fs == NULL) { + fs = ip->i_fs; + devvp = ip->i_devvp; + } + if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { - printf("dev = 0x%x, bsize = %d, size = %ld, fs = %s\n", - ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); + if (ip == NULL) + printf("bsize = %d, size = %ld, fs = %s\n", + fs->fs_bsize, size, fs->fs_fsmnt); + else + printf("dev = 0x%x, bsize = %d, size = %ld, fs = %s\n", + ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); panic("ffs_blkfree: bad size"); } cg = dtog(fs, bno); if ((u_int)bno >= fs->fs_size) { - printf("bad block %lld, ino %u\n", (long long)bno, - ip->i_number); - ffs_fserr(fs, DIP(ip, uid), "bad block"); + if (ip == NULL) { + printf("bad block %lld\n", (long long)bno); + ffs_fserr(fs, 0, "bad block"); + } else { + printf("bad block %lld, ino %u\n", (long long)bno, + ip->i_number); + ffs_fserr(fs, DIP(ip, uid), "bad block"); + } return; } - if (!(bp = ffs_cgread(fs, ip, cg))) + if (!(bp = ffs_cgread(fs, devvp, cg))) return; cgp = (struct cg *)bp->b_data; @@ -1858,8 +2092,12 @@ if (size == fs->fs_bsize) { blkno = fragstoblks(fs, bno); if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) { - printf("dev = 0x%x, block = %lld, fs = %s\n", - ip->i_dev, (long long)bno, fs->fs_fsmnt); + if (ip == NULL) + printf("block = %lld, fs = %s\n", + (long long)bno, fs->fs_fsmnt); + else + printf("dev = 0x%x, block = %lld, fs = %s\n", + ip->i_dev, (long long)bno, fs->fs_fsmnt); panic("ffs_blkfree: freeing free block"); } ffs_setblock(fs, cg_blksfree(cgp), blkno); @@ -1887,9 +2125,13 @@ frags = numfrags(fs, size); for (i = 0; i < frags; i++) { if (isset(cg_blksfree(cgp), bno + i)) { - printf("dev = 0x%x, block = %lld, fs = %s\n", - ip->i_dev, (long long)(bno + i), - fs->fs_fsmnt); + if (ip == NULL) + printf("block = %lld, fs = %s\n", + (long long)(bno + i), fs->fs_fsmnt); + else + printf("dev = 0x%x, block = %lld, " + "fs = %s\n", ip->i_dev, + (long long)(bno + i), fs->fs_fsmnt); panic("ffs_blkfree: freeing free frag"); } setbit(cg_blksfree(cgp), bno + i); @@ -1957,7 +2199,7 @@ pip->i_dev, ino, fs->fs_fsmnt); cg = ino_to_cg(fs, ino); - if (!(bp = ffs_cgread(fs, pip, cg))) + if (!(bp = ffs_cgread(fs, pip->i_devvp, cg))) return (0); cgp = (struct cg *)bp->b_data; @@ -1971,6 +2213,8 @@ panic("ffs_freefile: freeing free inode"); } clrbit(cg_inosused(cgp), ino); + UFS_WAPBL_UNREGISTER_INODE(pip->i_ump->um_mountp, + ino + cg * fs->fs_ipg, mode); if (ino < cgp->cg_irotor) cgp->cg_irotor = ino; cgp->cg_cs.cs_nifree++; @@ -2008,7 +2252,7 @@ if ((u_int)bno >= fs->fs_size) panic("ffs_checkblk: bad block %lld", (long long)bno); - if (!(bp = ffs_cgread(fs, ip, dtog(fs, bno)))) + if (!(bp = ffs_cgread(fs, ip->i_devvp, dtog(fs, bno)))) return (0); cgp = (struct cg *)bp->b_data; Index: sys/ufs/ffs/ffs_balloc.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ffs/ffs_balloc.c,v retrieving revision 1.43 diff -u -r1.43 ffs_balloc.c --- sys/ufs/ffs/ffs_balloc.c 14 Mar 2015 03:38:52 -0000 1.43 +++ sys/ufs/ffs/ffs_balloc.c 23 Oct 2015 15:07:07 -0000 @@ -103,8 +103,9 @@ osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { error = ffs_realloccg(ip, nb, - ffs1_blkpref(ip, nb, (int)nb, &ip->i_ffs1_db[0]), - osize, (int)fs->fs_bsize, cred, bpp, &newb); + ffs1_blkpref(ip, nb, (int)nb, flags, + &ip->i_ffs1_db[0]), osize, (int)fs->fs_bsize, + flags, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) @@ -165,7 +166,7 @@ brelse(*bpp); return (error); } - (*bpp)->b_bcount = osize; + buf_adjcnt((*bpp), osize); } return (0); } else { @@ -174,9 +175,9 @@ * want, grow it. */ error = ffs_realloccg(ip, lbn, - ffs1_blkpref(ip, lbn, (int)lbn, + ffs1_blkpref(ip, lbn, (int)lbn, flags, &ip->i_ffs1_db[0]), - osize, nsize, cred, bpp, &newb); + osize, nsize, flags, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) @@ -195,8 +196,8 @@ else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, - ffs1_blkpref(ip, lbn, (int)lbn, &ip->i_ffs1_db[0]), - nsize, cred, &newb); + ffs1_blkpref(ip, lbn, (int)lbn, flags, + &ip->i_ffs1_db[0]), nsize, flags, cred, &newb); if (error) return (error); if (bpp != NULL) { @@ -235,9 +236,10 @@ allocib = NULL; allocblk = allociblk; if (nb == 0) { - pref = ffs1_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL); + pref = ffs1_blkpref(ip, lbn, -indirs[0].in_off - 1, + flags | B_METAONLY, NULL); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, - cred, &newb); + flags | B_METAONLY, cred, &newb); if (error) goto fail; nb = newb; @@ -283,9 +285,10 @@ continue; } if (pref == 0) - pref = ffs1_blkpref(ip, lbn, i - num - 1, NULL); - error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, - &newb); + pref = ffs1_blkpref(ip, lbn, i - num - 1, + flags | B_METAONLY, NULL); + error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, + flags | B_METAONLY, cred, &newb); if (error) { brelse(bp); goto fail; @@ -323,13 +326,20 @@ bdwrite(bp); } } + + if (flags & B_METAONLY) { + KASSERT(bpp != NULL); + *bpp = bp; + return (0); + } + /* * Get the data block, allocating if necessary. */ if (nb == 0) { - pref = ffs1_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); - error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, - &newb); + pref = ffs1_blkpref(ip, lbn, indirs[i].in_off, flags, &bap[0]); + error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, + cred, &newb); if (error) { brelse(bp); goto fail; @@ -468,8 +478,8 @@ osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { error = ffs_realloccg(ip, nb, ffs2_blkpref(ip, - lastlbn, nb, &ip->i_ffs2_db[0]), osize, - (int) fs->fs_bsize, cred, bpp, &newb); + lastlbn, nb, flags, &ip->i_ffs2_db[0]), osize, + (int) fs->fs_bsize, flags, cred, bpp, &newb); if (error) return (error); @@ -535,7 +545,7 @@ brelse(*bpp); return (error); } - (*bpp)->b_bcount = osize; + buf_adjcnt((*bpp), osize); } return (0); @@ -545,9 +555,9 @@ * grow it. */ error = ffs_realloccg(ip, lbn, - ffs2_blkpref(ip, lbn, (int) lbn, - &ip->i_ffs2_db[0]), osize, nsize, cred, - bpp, &newb); + ffs2_blkpref(ip, lbn, (int) lbn, flags, + &ip->i_ffs2_db[0]), osize, nsize, flags, + cred, bpp, &newb); if (error) return (error); @@ -567,7 +577,8 @@ nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs2_blkpref(ip, lbn, - (int) lbn, &ip->i_ffs2_db[0]), nsize, cred, &newb); + (int) lbn, flags, &ip->i_ffs2_db[0]), nsize, flags, + cred, &newb); if (error) return (error); @@ -614,9 +625,10 @@ allocblk = allociblk; if (nb == 0) { - pref = ffs2_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL); - error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred, - &newb); + pref = ffs2_blkpref(ip, lbn, -indirs[0].in_off - 1, + flags | B_METAONLY, NULL); + error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, + flags | B_METAONLY, cred, &newb); if (error) goto fail; @@ -670,10 +682,11 @@ } if (pref == 0) - pref = ffs2_blkpref(ip, lbn, i - num - 1, NULL); + pref = ffs2_blkpref(ip, lbn, i - num - 1, + flags | B_METAONLY, NULL); - error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred, - &newb); + error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, + flags | B_METAONLY, cred, &newb); if (error) { brelse(bp); goto fail; @@ -716,14 +729,21 @@ bdwrite(bp); } + if (flags & B_METAONLY) { + KASSERT(bpp != NULL); + *bpp = bp; + return (0); + } + /* * Get the data block, allocating if necessary. */ if (nb == 0) { - pref = ffs2_blkpref(ip, lbn, indirs[num].in_off, &bap[0]); + pref = ffs2_blkpref(ip, lbn, indirs[num].in_off, flags, + &bap[0]); - error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, - &newb); + error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, + cred, &newb); if (error) { brelse(bp); goto fail; Index: sys/ufs/ffs/ffs_extern.h =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ffs/ffs_extern.h,v retrieving revision 1.40 diff -u -r1.40 ffs_extern.h --- sys/ufs/ffs/ffs_extern.h 25 Jan 2014 23:31:12 -0000 1.40 +++ sys/ufs/ffs/ffs_extern.h 23 Oct 2015 15:07:07 -0000 @@ -99,19 +99,20 @@ extern struct vops ffs_fifovops; /* ffs_alloc.c */ -int ffs_alloc(struct inode *, daddr_t, daddr_t , int, struct ucred *, - daddr_t *); -int ffs_realloccg(struct inode *, daddr_t, daddr_t, int, int , - struct ucred *, struct buf **, daddr_t *); +int ffs_alloc(struct inode *, daddr_t, daddr_t , int, int, struct ucred *, + daddr_t *); +int ffs_realloccg(struct inode *, daddr_t, daddr_t, int, int, int, + struct ucred *, struct buf **, daddr_t *); int ffs_reallocblks(void *); int ffs_inode_alloc(struct inode *, mode_t, struct ucred *, struct vnode **); int ffs_inode_free(struct inode *, ufsino_t, mode_t); int ffs_freefile(struct inode *, ufsino_t, mode_t); -int32_t ffs1_blkpref(struct inode *, daddr_t, int, int32_t *); +int32_t ffs1_blkpref(struct inode *, daddr_t, int, int, int32_t *); #ifdef FFS2 -int64_t ffs2_blkpref(struct inode *, daddr_t, int, int64_t *); +int64_t ffs2_blkpref(struct inode *, daddr_t, int, int, int64_t *); #endif +int ffs_blkalloc_ump(struct ufsmount *, daddr_t, long); void ffs_blkfree(struct inode *, daddr_t, long); void ffs_clusteracct(struct fs *, struct cg *, daddr_t, int); @@ -160,6 +161,18 @@ int ffs_reclaim(void *); int ffsfifo_reclaim(void *); +/* ffs_wapbl.c -- write ahead physical block logging */ +void ffs_wapbl_verify_inodes(struct mount *, const char *); +void ffs_wapbl_replay_finish(struct mount *); +int ffs_wapbl_start(struct mount *); +int ffs_wapbl_stop(struct mount *, int); +int ffs_wapbl_replay_start(struct mount *, struct fs *, struct vnode *); +void ffs_wapbl_blkalloc(struct fs *, struct vnode *, daddr_t, int); + +void ffs_wapbl_sync_metadata(struct mount *, daddr_t *, int *, int); +void ffs_wapbl_abort_sync_metadata(struct mount *, daddr_t *, int *, int); +void ffs_wapbl_blkfree(struct fs *, struct vnode *, daddr_t, long); + /* * Soft dependency function prototypes. */ Index: sys/ufs/ffs/ffs_inode.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ffs/ffs_inode.c,v retrieving revision 1.74 diff -u -r1.74 ffs_inode.c --- sys/ufs/ffs/ffs_inode.c 14 Mar 2015 03:38:52 -0000 1.74 +++ sys/ufs/ffs/ffs_inode.c 23 Oct 2015 15:07:07 -0000 @@ -41,11 +41,13 @@ #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/resourcevar.h> +#include <sys/wapbl.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> +#include <ufs/ufs/ufs_wapbl.h> #include <ufs/ffs/fs.h> #include <ufs/ffs/ffs_extern.h> @@ -95,6 +97,16 @@ return (error); } + if (DIP(ip, mode)) { + if (DIP(ip, nlink) > 0) { + UFS_WAPBL_UNREGISTER_INODE(ip->i_ump->um_mountp, + ip->i_number, DIP(ip, mode)); + } else { + UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, + ip->i_number, DIP(ip, mode)); + } + } + if (DOINGSOFTDEP(vp)) softdep_update_inodeblock(ip, bp, waitfor); else if (ip->i_effnlink != DIP(ip, nlink)) @@ -135,7 +147,7 @@ struct fs *fs; struct buf *bp; int offset, size, level; - long count, nblocks, vflags, blocksreleased = 0; + long count, nblocks, blocksreleased = 0; int i, aflags, error, allerror; off_t osize; @@ -262,7 +274,7 @@ (void) uvm_vnp_uncache(ovp); if (ovp->v_type != VDIR) memset(bp->b_data + offset, 0, size - offset); - bp->b_bcount = size; + buf_adjcnt(bp, size); if (aflags & B_SYNC) bwrite(bp); else @@ -321,8 +333,7 @@ } DIP_ASSIGN(oip, size, osize); - vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; - allerror = vinvalbuf(ovp, vflags, cred, curproc, 0, 0); + allerror = vtruncbuf(ovp, lastblock + 1, 0, 0); /* * Indirect blocks first. @@ -340,7 +351,12 @@ blocksreleased += count; if (lastiblock[level] < 0) { DIP_ASSIGN(oip, ib[level], 0); - ffs_blkfree(oip, bn, fs->fs_bsize); + if (oip->i_ump->um_mountp->mnt_wapbl) { + UFS_WAPBL_REGISTER_DEALLOCATION( + oip->i_ump->um_mountp, + fsbtodb(fs, bn), fs->fs_bsize); + } else + ffs_blkfree(oip, bn, fs->fs_bsize); blocksreleased += nblocks; } } @@ -360,7 +376,12 @@ DIP_ASSIGN(oip, db[i], 0); bsize = blksize(fs, oip, i); - ffs_blkfree(oip, bn, bsize); + if ((oip->i_ump->um_mountp->mnt_wapbl) && + (ovp->v_type != VREG)) { + UFS_WAPBL_REGISTER_DEALLOCATION(oip->i_ump->um_mountp, + fsbtodb(fs, bn), bsize); + } else + ffs_blkfree(oip, bn, bsize); blocksreleased += btodb(bsize); } if (lastblock < 0) @@ -390,7 +411,13 @@ * required for the storage we're keeping. */ bn += numfrags(fs, newspace); - ffs_blkfree(oip, bn, oldspace - newspace); + if ((oip->i_ump->um_mountp->mnt_wapbl) && + (ovp->v_type != VREG)) { + UFS_WAPBL_REGISTER_DEALLOCATION( + oip->i_ump->um_mountp, fsbtodb(fs, bn), + oldspace - newspace); + } else + ffs_blkfree(oip, bn, oldspace - newspace); blocksreleased += btodb(oldspace - newspace); } } @@ -412,6 +439,7 @@ else /* sanity */ DIP_ASSIGN(oip, blocks, 0); oip->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(oip, 0); (void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED); return (allerror); } @@ -541,7 +569,12 @@ allerror = error; blocksreleased += blkcount; } - ffs_blkfree(ip, nb, fs->fs_bsize); + if (ip->i_ump->um_mountp->mnt_wapbl && + ((level > SINGLE) || ITOV(ip)->v_type != VREG)) { + UFS_WAPBL_REGISTER_DEALLOCATION(ip->i_ump->um_mountp, + fsbtodb(fs, nb), fs->fs_bsize); + } else + ffs_blkfree(ip, nb, fs->fs_bsize); blocksreleased += nblocks; } Index: sys/ufs/ffs/ffs_subr.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ffs/ffs_subr.c,v retrieving revision 1.29 diff -u -r1.29 ffs_subr.c --- sys/ufs/ffs/ffs_subr.c 2 Nov 2013 00:08:17 -0000 1.29 +++ sys/ufs/ffs/ffs_subr.c 23 Oct 2015 15:07:07 -0000 @@ -72,7 +72,7 @@ brelse(bp); return (error); } - bp->b_bcount = bsize; + buf_adjcnt(bp, bsize); if (res) *res = (char *)bp->b_data + blkoff(fs, offset); *bpp = bp; Index: sys/ufs/ffs/ffs_vfsops.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ffs/ffs_vfsops.c,v retrieving revision 1.149 diff -u -r1.149 ffs_vfsops.c --- sys/ufs/ffs/ffs_vfsops.c 14 Mar 2015 03:38:52 -0000 1.149 +++ sys/ufs/ffs/ffs_vfsops.c 23 Oct 2015 15:07:07 -0000 @@ -50,7 +50,7 @@ #include <sys/pool.h> #include <sys/dkio.h> #include <sys/disk.h> -#include <sys/specdev.h> +#include <sys/wapbl.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/ufsmount.h> @@ -58,6 +58,7 @@ #include <ufs/ufs/dir.h> #include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/dirhash.h> +#include <ufs/ufs/ufs_wapbl.h> #include <ufs/ffs/fs.h> #include <ufs/ffs/ffs_extern.h> @@ -178,6 +179,15 @@ if (error) return (error); +#ifdef WAPBL + /* WAPBL can only be enabled on a r/w mount. */ + if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_flag & MNT_WANTRDWR)) { + mp->mnt_flag &= ~MNT_LOG; + } +#else /* !WAPBL */ + mp->mnt_flag &= ~MNT_LOG; +#endif /* !WAPBL */ + #ifndef FFS_SOFTUPDATES if (mp->mnt_flag & MNT_SOFTDEP) { printf("WARNING: soft updates isn't compiled in\n"); @@ -194,6 +204,16 @@ (MNT_SOFTDEP | MNT_ASYNC)) { return (EINVAL); } + +#ifdef WAPBL + /* + * Likewise, WAPBL is incompatible with MNT_ASYNC and MNT_SOFTDEP. + */ + if (mp->mnt_flag & MNT_LOG) + if (mp->mnt_flag & (MNT_ASYNC|MNT_SOFTDEP)) + return (EINVAL); +#endif /* WAPBL */ + /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. @@ -220,11 +240,30 @@ if (fs->fs_flags & FS_DOSOFTDEP) { error = softdep_flushfiles(mp, flags, p); mp->mnt_flag &= ~MNT_SOFTDEP; - } else + } else { error = ffs_flushfiles(mp, flags, p); + if (error == 0) + error = UFS_WAPBL_BEGIN(mp); + if (error == 0 && + ffs_cgupdate(ump, MNT_WAIT) == 0 && + fs->fs_clean & FS_WASCLEAN) { + fs->fs_clean = FS_ISCLEAN; + (void) ffs_sbupdate(ump, MNT_WAIT); + } + if (error == 0) + UFS_WAPBL_END(mp); + } ronly = 1; } +#ifdef WAPBL + if (error == 0 && (mp->mnt_flag & MNT_LOG) == 0) { + error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE); + if (error) + return (error); + } +#endif /* WAPBL */ + /* * Flush soft dependencies if disabling it via an update * mount. This may leave some items to be processed, @@ -277,6 +316,31 @@ goto error_1; } + fs->fs_contigdirs = malloc((u_long)fs->fs_ncg, + M_UFSMNT, M_WAITOK|M_ZERO); + +#ifdef WAPBL + if (mp->mnt_flag & MNT_LOG) { + fs->fs_ronly = 0; + fs->fs_fmod = 1; + } + + if (fs->fs_flags & FS_DOWAPBL) { + printf("%s: replaying log to disk\n", + mp->mnt_stat.f_mntonname); + KASSERT(mp->mnt_wapbl_replay); + error = wapbl_replay_write(mp->mnt_wapbl_replay, + devvp); + if (error) { + free(fs->fs_contigdirs, M_UFSMNT, 0); + return (error); + } + wapbl_replay_stop(mp->mnt_wapbl_replay); + fs->fs_clean = FS_WASCLEAN; + goto logok; + } +#endif /* WAPBL */ + if (fs->fs_clean == 0) { #if 0 /* @@ -313,8 +377,18 @@ fs->fs_contigdirs = malloc((u_long)fs->fs_ncg, M_UFSMNT, M_WAITOK|M_ZERO); +#ifdef WAPBL +logok: +#endif /* WAPBL */ ronly = 0; } + +#ifdef WAPBL + error = ffs_wapbl_start(mp); + if (error) + return error; +#endif /* WAPBL */ + if (args.fspec == NULL) { /* * Process export requests. @@ -323,8 +397,12 @@ &args.export_info); if (error) goto error_1; - else + else { + error = UFS_WAPBL_BEGIN(mp); + if (error) + goto error_1; goto success; + } } } @@ -417,6 +495,10 @@ if (error) goto error_2; + error = UFS_WAPBL_BEGIN(mp); + if (error) + goto error_2; + /* * Initialize FS stat information in mount struct; uses both * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname @@ -445,6 +527,7 @@ } ffs_sbupdate(ump, MNT_WAIT); } + UFS_WAPBL_END(mp); return (0); error_2: /* error with devvp held */ @@ -693,6 +776,10 @@ bp = NULL; ump = NULL; +#ifdef WAPBL +sbagain: +#endif /* WAPBL */ + /* * Try reading the super-block in each of its possible locations. */ @@ -735,6 +822,43 @@ goto out; } +#ifdef WAPBL + if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) { + error = ffs_wapbl_replay_start(mp, fs, devvp); + if (error && (mp->mnt_flag & MNT_FORCE) == 0) + goto out; + if (!error) { + if (!ronly) { + /* XXX fsmnt may be stale. */ + printf("%s: replaying log to disk\n", + fs->fs_fsmnt); + error = wapbl_replay_write(mp->mnt_wapbl_replay, + devvp); + if (error) + goto out; + wapbl_replay_stop(mp->mnt_wapbl_replay); + fs->fs_clean = FS_WASCLEAN; + } else { + /* XXX fsmnt may be stale */ + printf("%s: replaying log to memory\n", + fs->fs_fsmnt); + } + + /* Force a re-read of the superblock */ + bp->b_flags |= B_INVAL; + brelse(bp); + bp = NULL; + fs = NULL; + goto sbagain; + } + } +#else /* !WAPBL */ + if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) { + error = EPERM; + goto out; + } +#endif /* !WAPBL */ + fs->fs_fmod = 0; fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { @@ -793,6 +917,13 @@ ffs1_compat_read(fs, ump, sbloc); + /* Don't bump fs_clean if we're replaying journal */ + if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN))) + if (ronly == 0) { + fs->fs_clean <<= 1; + fs->fs_fmod = 1; + } + if (fs->fs_clean == 0) fs->fs_flags |= FS_UNCLEAN; fs->fs_ronly = ronly; @@ -879,6 +1010,38 @@ if (fs->fs_maxfilesize > maxfilesize) /* XXX */ fs->fs_maxfilesize = maxfilesize; /* XXX */ if (ronly == 0) { +#ifdef WAPBL + KASSERT(fs->fs_ronly == 0); + /* + * verify that we can access the last block in the fs if we're + * mounting read/write. + */ + error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), + fs->fs_fsize, &bp); + if (bp->b_bcount != fs->fs_fsize) + error = EINVAL; + bp->b_flags |= B_INVAL; + if (error) { + free(fs->fs_csp, M_UFSMNT, 0); + free(fs->fs_contigdirs, M_UFSMNT, 0); + goto out; + } + brelse(bp); + bp = NULL; + /* + * ffs_wapbl_start() needs mp->mnt_stat initialised if it + * needs to create a new log file in-filesystem. + */ + ffs_statfs(mp, &mp->mnt_stat, curproc); + + error = ffs_wapbl_start(mp); + if (error) { + free(fs->fs_csp, M_UFSMNT, 0); + free(fs->fs_contigdirs, M_UFSMNT, 0); + goto out; + } +#endif /* WAPBL */ + if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, cred)) != 0) { free(fs->fs_csp, M_UFSMNT, 0); @@ -891,12 +1054,30 @@ fs->fs_flags |= FS_DOSOFTDEP; else fs->fs_flags &= ~FS_DOSOFTDEP; + error = UFS_WAPBL_BEGIN(mp); + if (error) { + free(fs->fs_csp, M_UFSMNT, 0); + free(fs->fs_contigdirs, M_UFSMNT, 0); + goto out; + } error = ffs_sbupdate(ump, MNT_WAIT); - if (error == EROFS) + UFS_WAPBL_END(mp); + if (error == EROFS) { + free(fs->fs_csp, M_UFSMNT, 0); + free(fs->fs_contigdirs, M_UFSMNT, 0); goto out; + } } return (0); out: +#ifdef WAPBL + if (mp->mnt_wapbl_replay) { + wapbl_replay_stop(mp->mnt_wapbl_replay); + wapbl_replay_free(mp->mnt_wapbl_replay); + mp->mnt_wapbl_replay = NULL; + } +#endif /* WAPBL */ + devvp->v_specmountpoint = NULL; if (bp) brelse(bp); @@ -994,7 +1175,10 @@ struct ufsmount *ump; struct fs *fs; int error, flags; - +#ifdef WAPBL + extern int doforce; +#endif /* WAPBL */ + flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; @@ -1007,6 +1191,9 @@ error = ffs_flushfiles(mp, flags, p); if (error != 0) return (error); + error = UFS_WAPBL_BEGIN(mp); + if (error) + goto logfail; if (fs->fs_ronly == 0) { fs->fs_clean = (fs->fs_flags & FS_UNCLEAN) ? 0 : 1; @@ -1018,6 +1205,21 @@ } free(fs->fs_contigdirs, M_UFSMNT, 0); } + UFS_WAPBL_END(mp); +logfail: +#ifdef WAPBL + KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl)); + if (mp->mnt_wapbl_replay) { + KASSERT(fs->fs_ronly); + wapbl_replay_stop(mp->mnt_wapbl_replay); + wapbl_replay_free(mp->mnt_wapbl_replay); + mp->mnt_wapbl_replay = NULL; + } + error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE)); + if (error) { + return error; + } +#endif /* WAPBL */ ump->um_devvp->v_specmountpoint = NULL; vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); @@ -1069,6 +1271,17 @@ vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p); VOP_UNLOCK(ump->um_devvp, 0, p); + +#ifdef WAPBL + if (error) + return error; + if (mp->mnt_wapbl) { + error = wapbl_flush(mp->mnt_wapbl, 1); + if (flags & FORCECLOSE) + error = 0; + } +#endif + return (error); } @@ -1202,9 +1415,26 @@ * Write back modified superblock. */ - if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) - allerror = error; + if (fs->fs_fmod != 0) { + error = UFS_WAPBL_BEGIN(mp); + if (error) + allerror = error; + else { + error = ffs_cgupdate(ump, waitfor); + if (error) + allerror = error; + UFS_WAPBL_END(mp); + } + } +#ifdef WAPBL + if (mp->mnt_wapbl) { + error = wapbl_flush(mp->mnt_wapbl, waitfor != MNT_NOWAIT); + if (error) + allerror = error; + } +#endif /* WAPBL */ + return (allerror); } @@ -1466,6 +1696,42 @@ } int +ffs_cgupdate(struct ufsmount *mp, int waitfor) +{ + struct fs *fs = mp->um_fs; + struct buf *bp; + int blks; + void *space; + int i, size, error = 0, allerror = 0; + + allerror = ffs_sbupdate(mp, waitfor); + blks = howmany(fs->fs_cssize, fs->fs_fsize); + space = fs->fs_csp; + for (i = 0; i < blks; i += fs->fs_frag) { + size = fs->fs_bsize; + if (i + fs->fs_frag > blks) + size = (blks - i) * fs->fs_fsize; + bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), size, + 0, 0); +#ifdef FFS_EI + if (mp->um_flags & UFS_NEEDSWAP) + ffs_csum_swap((struct csum*)space, + (struct csum*)bp->b_data, size); + else +#endif + memcpy(bp->b_data, space, (u_int)size); + space = (char *)space + size; + if (waitfor == MNT_WAIT) + error = bwrite(bp); + else + bawrite(bp); + } + if (!allerror && error) + allerror = error; + return (allerror); +} + +int ffs_init(struct vfsconf *vfsp) { static int done; Index: sys/ufs/ffs/ffs_vnops.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ffs/ffs_vnops.c,v retrieving revision 1.80 diff -u -r1.80 ffs_vnops.c --- sys/ufs/ffs/ffs_vnops.c 14 Mar 2015 03:38:52 -0000 1.80 +++ sys/ufs/ffs/ffs_vnops.c 23 Oct 2015 15:07:07 -0000 @@ -45,7 +45,7 @@ #include <sys/signalvar.h> #include <sys/pool.h> #include <sys/event.h> -#include <sys/specdev.h> +#include <sys/wapbl.h> #include <miscfs/fifofs/fifo.h> @@ -54,6 +54,7 @@ #include <ufs/ufs/dir.h> #include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufsmount.h> +#include <ufs/ufs/ufs_wapbl.h> #include <ufs/ffs/fs.h> #include <ufs/ffs/ffs_extern.h> @@ -274,6 +275,13 @@ if (!(vp->v_mount->mnt_flag & MNT_NOATIME) || (ip->i_flag & (IN_CHANGE | IN_UPDATE))) { ip->i_flag |= IN_ACCESS; + if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) { + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error) + return (error); + error = UFS_UPDATE(ip, MNT_WAIT); + UFS_WAPBL_END(vp->v_mount); + } } return (error); } @@ -343,6 +351,12 @@ osize = DIP(ip, size); flags = ioflag & IO_SYNC ? B_SYNC : 0; + if ((ioflag & IO_JOURNALLOCKED) == 0) { + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error) + return (error); + } + for (error = 0; uio->uio_resid > 0;) { lbn = lblkno(fs, uio->uio_offset); blkoffset = blkoff(fs, uio->uio_offset); @@ -410,12 +424,232 @@ } } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { error = UFS_UPDATE(ip, 1); - } + } else + UFS_WAPBL_UPDATE(ip, 0); + if ((ioflag & IO_JOURNALLOCKED) == 0) + UFS_WAPBL_END(vp->v_mount); /* correct the result for writes clamped by vn_fsizechk() */ uio->uio_resid += overrun; return (error); } +#ifdef WAPBL +int ffs_wapbl_fsync_full(void *); +int ffs_wapbl_fsync(void *); +int ffs_wapbl_fsync_vfs(struct vnode *, int); +int ffs_wapbl_fsync_device(void *); + +int +ffs_wapbl_fsync_full(void *v) +{ + struct vop_fsync_args *ap = v; + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct mount *mp = vp->v_mount; + int waitfor = ap->a_waitfor; + int s, error = 0; + + KASSERT(vp->v_type != VREG); + KASSERT(mp->mnt_wapbl != NULL); + +#ifdef DIAGNOSTIC + s = splbio(); + struct buf *bp, *nbp; + for (bp = LIST_FIRST(&vp->v_dirtyblkhd); + bp != LIST_END(&vp->v_dirtyblkhd); bp = nbp) { + nbp = LIST_NEXT(bp, b_vnbufs); + if ((bp->b_flags & B_LOCKED) == 0) + panic("ffs_wapbl_fsync_full: non-WAPBL buffer %p " + "on vnode %p", bp, vp); + } + splx(s); +#endif + + /* + * Don't bother writing out metadata if the syncer is making the + * request. We will let the sync vnode write it out in a single burst + * through a call to VFS_SYNC(). + */ + if (waitfor == MNT_LAZY) + return (0); + + if ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFIED)) != 0) { + error = UFS_WAPBL_BEGIN(mp); + if (error) + return (error); + error = UFS_UPDATE(ip, waitfor == MNT_WAIT); + UFS_WAPBL_END(mp); + if (error) + return (error); + } + + /* + * Don't flush the log if the vnode being flushed contains no dirty + * buffers that could be in the log. + */ + s = splbio(); + if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { + splx(s); + error = wapbl_flush(mp->mnt_wapbl, 0); + if (error) + return (error); + s = splbio(); + } + + if (waitfor == MNT_WAIT) + error = vwaitforio(vp, 0, "wapblfsy", 0); + + splx(s); + + return (error); +} + +int +ffs_wapbl_fsync(void *v) +{ + struct vop_fsync_args *ap = v; + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct mount *mp = vp->v_mount; + struct buf *bp, *nbp; + int waitfor = ap->a_waitfor; + int s, error; + + KASSERT(vp->v_type == VREG); + KASSERT(mp->mnt_wapbl != NULL); + + /* + * Flush all data blocks. + */ +loop: + s = splbio(); + for (bp = LIST_FIRST(&vp->v_dirtyblkhd); + bp != LIST_END(&vp->v_dirtyblkhd); bp = nbp) { + nbp = LIST_NEXT(bp, b_vnbufs); + if ((bp->b_flags & B_BUSY) || bp->b_lblkno < 0) + continue; +#ifdef DIAGNOSTIC + if ((bp->b_flags & B_DELWRI) == 0) + panic("ffs_wapbl_fsync: not dirty"); +#endif + bremfree(bp); + buf_acquire(bp); + splx(s); + bawrite(bp); + goto loop; + } + + if (waitfor == MNT_WAIT) { + error = vwaitforio(vp, 0, "wapblsy", 0); + if (error) { + splx(s); + return (error); + } + } + + splx(s); + + /* + * Don't bother writing out metadata if the syncer is making the + * request. We will let the sync vnode write it out in a single burst + * through a call to VFS_SYNC(). + */ + if (waitfor == MNT_LAZY) + return (0); + + if ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFIED)) != 0) { + error = UFS_WAPBL_BEGIN(mp); + if (error) + return (error); + error = UFS_UPDATE(ip, waitfor == MNT_WAIT); + UFS_WAPBL_END(mp); + if (error) + return (error); + } + + return (wapbl_flush(mp->mnt_wapbl, 0)); +} + +/* + * Synch vnode for a mounted file system. + */ +int +ffs_wapbl_fsync_vfs(struct vnode *vp, int waitfor) +{ + int s, error = 0; + + KASSERT(vp->v_type == VBLK); + KASSERT(vp->v_specmountpoint != NULL); + KASSERT(vp->v_specmountpoint->mnt_wapbl != NULL); + +#ifdef DIAGNOSTIC + s = splbio(); + + struct buf *bp, *nbp; + for (bp = LIST_FIRST(&vp->v_dirtyblkhd); + bp != LIST_END(&vp->v_dirtyblkhd); bp = nbp) { + nbp = LIST_NEXT(bp, b_vnbufs); + if ((bp->b_flags & B_LOCKED) == 0) + panic("ffs_wapbl_fsync_vfs: non-WAPBL buffer %p " + "on vnode %p", bp, vp); + } + splx(s); +#endif + + /* + * Don't bother writing out metadata if the syncer is making the + * request. We will let the sync vnode write it out in a single burst + * through a call to VFS_SYNC(). + */ + if (waitfor == MNT_LAZY) + return (0); + + /* + * Don't flush the log if the vnode being flushed contains no dirty + * buffers that could be in the log. + */ + s = splbio(); + if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { + struct mount *mp = vp->v_specmountpoint; + splx(s); + error = wapbl_flush(mp->mnt_wapbl, 0); + if (error) + return (error); + s = splbio(); + } + + if (waitfor == MNT_WAIT) + error = vwaitforio(vp, 0, "wapblvfs", 0); + + splx(s); + + return (error); +} + +int +ffs_wapbl_fsync_device(void *v) +{ + struct vop_fsync_args *ap = v; + struct vnode *vp = ap->a_vp; + + KASSERT(vp->v_mount != NULL && vp->v_mount->mnt_wapbl != NULL); + + if (vp->v_type == VCHR) + return (0); + + /* Are we mounted somewhere with WAPBL? */ + if (vp->v_specmountpoint != NULL && + vp->v_specmountpoint->mnt_wapbl != NULL) + return (ffs_wapbl_fsync_vfs(vp, ap->a_waitfor)); + + vflushbuf(vp, ap->a_waitfor == MNT_WAIT); + + return (0); +} +#endif /* WAPBL */ + /* * Synch an open file. */ @@ -427,6 +661,21 @@ struct buf *bp, *nbp; int s, error, passes, skipmeta; +#if WAPBL + if (vp->v_mount && vp->v_mount->mnt_wapbl) { + if (vn_isdisk(vp, NULL)) + return (ffs_wapbl_fsync_device(ap)); + if (vp->v_type != VREG) + return (ffs_wapbl_fsync_full(ap)); + return (ffs_wapbl_fsync(ap)); + } + + if (vp->v_type == VBLK && + vp->v_specmountpoint != NULL && + vp->v_specmountpoint->mnt_wapbl != NULL) + return (ffs_wapbl_fsync_vfs(vp, ap->a_waitfor)); +#endif /* WAPBL */ + if (vp->v_type == VBLK && vp->v_specmountpoint != NULL && (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) @@ -523,6 +772,7 @@ } } splx(s); + return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT)); } Index: sys/ufs/ffs/fs.h =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ffs/fs.h,v retrieving revision 1.41 diff -u -r1.41 fs.h --- sys/ufs/ffs/fs.h 20 Jan 2015 18:08:16 -0000 1.41 +++ sys/ufs/ffs/fs.h 23 Oct 2015 15:07:07 -0000 @@ -270,7 +270,12 @@ int32_t fs_cpc; /* cyl per cycle in postbl */ /* this area is only allocated if fs_ffs1_flags & FS_FLAGS_UPDATED */ int32_t fs_maxbsize; /* maximum blocking factor permitted */ - int64_t fs_spareconf64[17]; /* old rotation block list head */ + uint8_t fs_journal_version; /* journal forma version */ + uint8_t fs_journal_location; /* journal location type */ + uint8_t fs_journal_reserved[2];/* reserver for future */ + uint32_t fs_journal_flags; /* journal flags */ + uint64_t fs_journallocs[4]; /* location info for journal */ + int64_t fs_spareconf64[12]; /* old rotation block list head */ int64_t fs_sblockloc; /* offset of standard super block */ struct csum_total fs_cstotal; /* cylinder summary information */ int64_t fs_time; /* time last written */ @@ -329,6 +334,7 @@ */ #define FS_UNCLEAN 0x01 /* filesystem not clean at mount */ #define FS_DOSOFTDEP 0x02 /* filesystem using soft dependencies */ +#define FS_DOWAPBL 0x04 /* write-ahead physical block logging */ /* * The following flag is used to detect a FFS1 file system that had its flags * moved to the new (FFS2) location for compatibility. @@ -518,6 +524,8 @@ ((loc) & (fs)->fs_qbmask) #define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \ ((loc) & (fs)->fs_qfmask) +#define lfragtosize(fs, frag) /* calculates ((off_t)frag * fs->fs_fsize) */ \ + (((off_t)(frag)) << (fs)->fs_fshift) #define lblktosize(fs, blk) /* calculates ((off_t)blk * fs->fs_bsize) */ \ ((off_t)(blk) << (fs)->fs_bshift) #define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ Index: sys/ufs/ufs/inode.h =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ufs/inode.h,v retrieving revision 1.49 diff -u -r1.49 inode.h --- sys/ufs/ufs/inode.h 14 Jul 2014 08:54:13 -0000 1.49 +++ sys/ufs/ufs/inode.h 23 Oct 2015 15:07:07 -0000 @@ -48,6 +48,13 @@ /* * Per-filesystem inode extensions. */ +struct ffs_inode_ext { + daddr_t *ffs_snapblklist; /* Collect expunged snapshot blocks. */ + /* follow two fields are used by contiguous allocation code only. */ + daddr_t ffs_first_data_blk; /* first data block on disk. */ + daddr_t ffs_first_indir_blk; /* first indirect block on disk. */ +}; + struct ext2fs_inode_ext { u_int32_t ext2fs_last_lblk; /* last logical blk allocated */ u_int32_t ext2fs_last_blk; /* last blk allocated on disk */ @@ -102,10 +109,13 @@ */ union { /* Other extensions could go here... */ + struct ffs_inode_ext ffs; struct ext2fs_inode_ext e2fs; struct dirhash *dirhash; } inode_ext; +#define i_ffs_first_data_blk inode_ext.ffs.ffs_first_data_blk +#define i_ffs_first_indir_blk inode_ext.ffs.ffs_first_indir_blk #define i_e2fs_last_lblk inode_ext.e2fs.ext2fs_last_lblk #define i_e2fs_last_blk inode_ext.e2fs.ext2fs_last_blk #define i_e2fs_uid inode_ext.e2fs.ext2fs_effective_uid Index: sys/ufs/ufs/ufs_extern.h =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ufs/ufs_extern.h,v retrieving revision 1.35 diff -u -r1.35 ufs_extern.h --- sys/ufs/ufs/ufs_extern.h 25 Jan 2014 23:31:13 -0000 1.35 +++ sys/ufs/ufs/ufs_extern.h 23 Oct 2015 15:07:07 -0000 @@ -134,7 +134,7 @@ void ufs_itimes(struct vnode *); int ufs_makeinode(int, struct vnode *, struct vnode **, struct componentname *); - +int ufs_gop_alloc(struct vnode *, off_t, off_t, int, struct ucred *); /* * Soft dependency function prototypes. Index: sys/ufs/ufs/ufs_inode.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ufs/ufs_inode.c,v retrieving revision 1.41 diff -u -r1.41 ufs_inode.c --- sys/ufs/ufs/ufs_inode.c 14 Mar 2015 03:38:53 -0000 1.41 +++ sys/ufs/ufs/ufs_inode.c 23 Oct 2015 15:07:07 -0000 @@ -43,16 +43,20 @@ #include <sys/mount.h> #include <sys/malloc.h> #include <sys/namei.h> +#include <sys/wapbl.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> +#include <ufs/ufs/ufs_wapbl.h> #ifdef UFS_DIRHASH #include <ufs/ufs/dir.h> #include <ufs/ufs/dirhash.h> #endif +#include <ufs/ffs/fs.h> + /* * Last reference to an inode. If necessary, write or delete it. */ @@ -62,9 +66,10 @@ struct vop_inactive_args *ap = v; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); + struct fs *fs = ip->i_fs; struct proc *p = ap->a_p; mode_t mode; - int error = 0; + int error = 0, logged = 0; #ifdef DIAGNOSTIC extern int prtactive; @@ -72,6 +77,8 @@ vprint("ufs_inactive: pushing active", vp); #endif + UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount); + /* * Ignore inodes related to stale file handles. */ @@ -79,9 +86,42 @@ goto out; if (DIP(ip, nlink) <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error) + goto out; + logged = 1; if (getinoquota(ip) == 0) (void)ufs_quota_free_inode(ip, NOCRED); + if (DIP(ip, size) != 0) { + /* + * When journaling, only truncate one indirect block + * at a time + */ + if (vp->v_mount->mnt_wapbl) { + uint64_t incr = MNINDIR(ip->i_ump) << + fs->fs_bshift; + uint64_t base = NDADDR << fs->fs_bshift; + while (!error && DIP(ip, size) > base + incr) { + /* + * round down to next full indirect + * block boundary. + */ + uint64_t nsize = base + + ((DIP(ip, size) - base - 1) & + ~(incr - 1)); + error = UFS_TRUNCATE(ip, nsize, 0, + NOCRED); + if (error) + break; + UFS_WAPBL_END(vp->v_mount); + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error) + goto out; + } + } + } + error = UFS_TRUNCATE(ip, (off_t)0, 0, NOCRED); DIP_ASSIGN(ip, rdev, 0); @@ -108,8 +148,16 @@ } if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { + if (!logged++) { + int err; + err = UFS_WAPBL_BEGIN(vp->v_mount); + if (err) + goto out; + } UFS_UPDATE(ip, 0); } + if (logged) + UFS_WAPBL_END(vp->v_mount); out: VOP_UNLOCK(vp, 0, p); @@ -143,8 +191,12 @@ * Stop deferring timestamp writes */ if (ip->i_flag & IN_LAZYMOD) { + int err = UFS_WAPBL_BEGIN(vp->v_mount); + if (err) + return (err); ip->i_flag |= IN_MODIFIED; UFS_UPDATE(ip, 0); + UFS_WAPBL_END(vp->v_mount); } /* Index: sys/ufs/ufs/ufs_lookup.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ufs/ufs_lookup.c,v retrieving revision 1.47 diff -u -r1.47 ufs_lookup.c --- sys/ufs/ufs/ufs_lookup.c 14 Mar 2015 03:38:53 -0000 1.47 +++ sys/ufs/ufs/ufs_lookup.c 23 Oct 2015 15:07:07 -0000 @@ -46,6 +46,7 @@ #include <sys/mount.h> #include <sys/proc.h> #include <sys/vnode.h> +#include <sys/wapbl.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> @@ -55,6 +56,7 @@ #endif #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> +#include <ufs/ufs/ufs_wapbl.h> extern struct nchstats nchstats; @@ -445,6 +447,7 @@ ufs_dirbad(dp, dp->i_offset, "i_ffs_size too small"); DIP_ASSIGN(dp, size, dp->i_offset + DIRSIZ(FSFMT(vdp), ep)); dp->i_flag |= IN_CHANGE | IN_UPDATE; + UFS_WAPBL_UPDATE(dp, MNT_WAIT); } brelse(bp); @@ -704,6 +707,8 @@ int error, ret, blkoff, loc, spacefree, flags; char *dirbuf; + UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount); + error = 0; cr = cnp->cn_cred; p = cnp->cn_proc; @@ -805,8 +810,11 @@ * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ - if (dp->i_offset + dp->i_count > DIP(dp, size)) + if (dp->i_offset + dp->i_count > DIP(dp, size)) { DIP_ASSIGN(dp, size, dp->i_offset + dp->i_count); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + UFS_WAPBL_UPDATE(dp, MNT_WAIT); + } /* * Get the block containing the space for the new directory entry. */ @@ -925,6 +933,7 @@ if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); } + UFS_WAPBL_UPDATE(dp, MNT_WAIT); return (error); } @@ -948,6 +957,8 @@ struct buf *bp; int error; + UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount); + dp = VTOI(dvp); if ((error = UFS_BUFATOFF(dp, @@ -997,6 +1008,7 @@ ip->i_effnlink--; DIP_ADD(ip, nlink, -1); ip->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(ip, 0); } if (DOINGASYNC(dvp) && dp->i_count != 0) { bdwrite(bp); @@ -1005,6 +1017,7 @@ error = bwrite(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; + UFS_WAPBL_UPDATE(dp, 0); return (error); } @@ -1036,6 +1049,7 @@ } else { DIP_ADD(oip, nlink, -1); oip->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(oip, MNT_WAIT); if (DOINGASYNC(vdp)) { bdwrite(bp); error = 0; @@ -1044,6 +1058,7 @@ } } dp->i_flag |= IN_CHANGE | IN_UPDATE; + UFS_WAPBL_UPDATE(dp, MNT_WAIT); return (error); } Index: sys/ufs/ufs/ufs_vnops.c =================================================================== RCS file: /Volumes/CSP/cvs/src/sys/ufs/ufs/ufs_vnops.c,v retrieving revision 1.122 diff -u -r1.122 ufs_vnops.c --- sys/ufs/ufs/ufs_vnops.c 23 Sep 2015 15:37:26 -0000 1.122 +++ sys/ufs/ufs/ufs_vnops.c 23 Oct 2015 15:07:07 -0000 @@ -54,8 +54,8 @@ #include <sys/lockf.h> #include <sys/event.h> #include <sys/poll.h> -#include <sys/specdev.h> #include <sys/unistd.h> +#include <sys/wapbl.h> #include <miscfs/fifofs/fifo.h> @@ -64,6 +64,8 @@ #include <ufs/ufs/dir.h> #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> +#include <ufs/ufs/ufs_wapbl.h> +#include <ufs/ffs/fs.h> #ifdef UFS_DIRHASH #include <ufs/ufs/dirhash.h> #endif @@ -165,11 +167,13 @@ struct vop_create_args *ap = v; int error; + /* UFS_WAPBL_BEGIN1(dvp) performed by successful ufs_makeinode() */ error = ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), ap->a_dvp, ap->a_vpp, ap->a_cnp); if (error) return (error); + UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp); VN_KNOTE(ap->a_dvp, NOTE_WRITE); return (0); } @@ -186,6 +190,10 @@ struct inode *ip; int error; + /* + * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful + * ufs_makeinode + */ if ((error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), ap->a_dvp, vpp, ap->a_cnp)) != 0) @@ -200,6 +208,8 @@ */ DIP_ASSIGN(ip, rdev, vap->va_rdev); } + UFS_WAPBL_UPDATE(VTOI(*vpp), 0); + UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp); /* * Remove inode so that it will be reloaded by VFS_VGET and * checked to see if it is an alias of an existing entry in @@ -348,6 +358,7 @@ struct vattr *vap = ap->a_vap; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); + struct fs *fs = ip->i_fs; struct ucred *cred = ap->a_cred; struct proc *p = ap->a_p; int error; @@ -369,19 +380,28 @@ if (cred->cr_uid != DIP(ip, uid) && (error = suser_ucred(cred))) return (error); + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error) + return (error); if (cred->cr_uid == 0) { if ((DIP(ip, flags) & (SF_IMMUTABLE | SF_APPEND)) && - securelevel > 0) + securelevel > 0) { + UFS_WAPBL_END(vp->v_mount); return (EPERM); + } DIP_ASSIGN(ip, flags, vap->va_flags); } else { if (DIP(ip, flags) & (SF_IMMUTABLE | SF_APPEND) || - (vap->va_flags & UF_SETTABLE) != vap->va_flags) + (vap->va_flags & UF_SETTABLE) != vap->va_flags) { + UFS_WAPBL_END(vp->v_mount); return (EPERM); + } DIP_AND(ip, flags, SF_SETTABLE); DIP_OR(ip, flags, vap->va_flags & UF_SETTABLE); } ip->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(ip, 0); + UFS_WAPBL_END(vp->v_mount); if (vap->va_flags & (IMMUTABLE | APPEND)) return (0); } @@ -393,7 +413,11 @@ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error) + return (error); error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p); + UFS_WAPBL_END(vp->v_mount); if (error) return (error); } @@ -415,8 +439,36 @@ default: break; } - if ((error = UFS_TRUNCATE(ip, vap->va_size, 0, cred)) != 0) - return (error); + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error) + return (error); + /* + * When journaling, only truncate one indirect block at a time. + */ + if (vp->v_mount->mnt_wapbl) { + uint64_t incr = MNINDIR(ip->i_ump) << fs->fs_bshift; + uint64_t base = NDADDR << fs->fs_bshift; + while (!error && DIP(ip, size) > base + incr && + DIP(ip, size) > vap->va_size + incr) { + /* + * round down to next full indirect + * block boundary. + */ + uint64_t nsize = base + + ((DIP(ip, size) - base - 1) & + ~(incr - 1)); + error = UFS_TRUNCATE(ip, nsize, 0, cred); + if (error == 0) { + UFS_WAPBL_END(vp->v_mount); + error = UFS_WAPBL_BEGIN(vp->v_mount); + } + } + } + if (!error) + error = UFS_TRUNCATE(ip, vap->va_size, 0, cred); + UFS_WAPBL_END(vp->v_mount); + if (error) + return (error); if (vap->va_size < oldsize) hint |= NOTE_TRUNCATE; } @@ -430,6 +482,9 @@ ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || (error = VOP_ACCESS(vp, VWRITE, cred, p)))) return (error); + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error) + return (error); if (vap->va_mtime.tv_nsec != VNOVAL) ip->i_flag |= IN_CHANGE | IN_UPDATE; else if (vap->va_vaflags & VA_UTIMES_CHANGE) @@ -449,6 +504,7 @@ DIP_ASSIGN(ip, atimensec, vap->va_atime.tv_nsec); } error = UFS_UPDATE(ip, 0); + UFS_WAPBL_END(vp->v_mount); if (error) return (error); } @@ -456,7 +512,11 @@ if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error) + return (error); error = ufs_chmod(vp, (int)vap->va_mode, cred, p); + UFS_WAPBL_END(vp->v_mount); } VN_KNOTE(vp, hint); return (error); @@ -472,6 +532,8 @@ struct inode *ip = VTOI(vp); int error; + UFS_WAPBL_JLOCK_ASSERT(vp->v_mount); + if (cred->cr_uid != DIP(ip, uid) && (error = suser_ucred(cred))) return (error); @@ -484,6 +546,7 @@ DIP_AND(ip, mode, ~ALLPERMS); DIP_OR(ip, mode, mode & ALLPERMS); ip->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(ip, 0); if ((vp->v_flag & VTEXT) && (DIP(ip, mode) & S_ISTXT) == 0) (void) uvm_vnp_uncache(vp); return (0); @@ -553,8 +616,10 @@ if (getinoquota(ip)) panic("chown: lost quota"); - if (ouid != uid || ogid != gid) + if (ouid != uid || ogid != gid) { ip->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(ip, 0); + } if (ouid != uid && cred->cr_uid != 0) DIP_AND(ip, mode, ~ISUID); if (ogid != gid && cred->cr_uid != 0) @@ -612,12 +677,16 @@ if (vp->v_type == VDIR || (DIP(ip, flags) & (IMMUTABLE | APPEND)) || (DIP(VTOI(dvp), flags) & APPEND)) { error = EPERM; - goto out; + } else { + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error == 0) { + error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); + VN_KNOTE(vp, NOTE_DELETE); + VN_KNOTE(dvp, NOTE_WRITE); + UFS_WAPBL_END(vp->v_mount); + } } - error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); - VN_KNOTE(vp, NOTE_DELETE); - VN_KNOTE(dvp, NOTE_WRITE); - out: + if (dvp == vp) vrele(vp); else @@ -670,6 +739,11 @@ error = EPERM; goto out1; } + error = UFS_WAPBL_BEGIN(vp->v_mount); + if (error) { + VOP_ABORTOP(dvp, cnp); + goto out1; + } ip->i_effnlink++; DIP_ADD(ip, nlink, 1); ip->i_flag |= IN_CHANGE; @@ -683,9 +757,11 @@ ip->i_effnlink--; DIP_ADD(ip, nlink, -1); ip->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(ip, MNT_WAIT); if (DOINGSOFTDEP(vp)) softdep_change_linkcnt(ip, 0); } + UFS_WAPBL_END(vp->v_mount); pool_put(&namei_pool, cnp->cn_pnbuf); VN_KNOTE(vp, NOTE_LINK); VN_KNOTE(dvp, NOTE_WRITE); @@ -729,6 +805,7 @@ struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; + struct mount *mp = fdvp->v_mount; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = fcnp->cn_proc; @@ -742,6 +819,9 @@ (fcnp->cn_flags & HASBUF) == 0) panic("ufs_rename: no name"); #endif + + KASSERT(mp != NULL); + /* * Check for cross-device rename. */ @@ -862,6 +942,12 @@ if (tvp) xp = VTOI(tvp); + error = UFS_WAPBL_BEGIN(mp); + if (error) { + VOP_UNLOCK(fvp, 0, p); + goto bad; + } + /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before @@ -1037,6 +1123,7 @@ if (!newparent) { DIP_ADD(dp, nlink, -1); dp->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(dp, 0); } DIP_ADD(xp, nlink, -1); @@ -1061,6 +1148,7 @@ panic("ufs_rename: lost from startdir"); if ((error = vfs_relookup(fdvp, &fvp, fcnp)) != 0) { vrele(ap->a_fvp); + UFS_WAPBL_END(mp); return (error); } vrele(fdvp); @@ -1071,6 +1159,7 @@ if (doingdirectory) panic("ufs_rename: lost dir entry"); vrele(ap->a_fvp); + UFS_WAPBL_END(mp); return (0); } @@ -1110,6 +1199,7 @@ if (xp) vput(fvp); vrele(ap->a_fvp); + UFS_WAPBL_END(mp); return (error); bad: @@ -1125,11 +1215,13 @@ DIP_ADD(ip, nlink, -1); ip->i_flag |= IN_CHANGE; ip->i_flag &= ~IN_RENAME; + UFS_WAPBL_UPDATE(ip, 0); if (DOINGSOFTDEP(fvp)) softdep_change_linkcnt(ip, 0); vput(fvp); } else vrele(fvp); + UFS_WAPBL_END(mp); return (error); } @@ -1171,6 +1263,14 @@ ip = VTOI(tvp); + error = UFS_WAPBL_BEGIN(dvp->v_mount); + if (error) { + UFS_INODE_FREE(ip, ip->i_number, dmode); + vput(tvp); + vput(dvp); + return (error); + } + DIP_ASSIGN(ip, uid, cnp->cn_cred->cr_uid); DIP_ASSIGN(ip, gid, DIP(dp, gid)); @@ -1178,6 +1278,7 @@ (error = ufs_quota_alloc_inode(ip, cnp->cn_cred))) { pool_put(&namei_pool, cnp->cn_pnbuf); UFS_INODE_FREE(ip, ip->i_number, dmode); + UFS_WAPBL_END(dvp->v_mount); vput(tvp); vput(dvp); return (error); @@ -1261,10 +1362,12 @@ if (error == 0) { VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); *ap->a_vpp = tvp; + UFS_WAPBL_END(dvp->v_mount); } else { dp->i_effnlink--; DIP_ADD(dp, nlink, -1); dp->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(dp, MNT_WAIT); if (DOINGSOFTDEP(dvp)) softdep_change_linkcnt(dp, 0); /* @@ -1274,6 +1377,8 @@ ip->i_effnlink = 0; DIP_ASSIGN(ip, nlink, 0); ip->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(ip, MNT_WAIT); + UFS_WAPBL_END(dvp->v_mount); if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(ip, 0); vput(tvp); @@ -1332,21 +1437,26 @@ error = EPERM; goto out; } + error = UFS_WAPBL_BEGIN(dvp->v_mount); + if (error) + goto out; + /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ - dp->i_effnlink--; - ip->i_effnlink--; if (DOINGSOFTDEP(vp)) { + dp->i_effnlink--; + ip->i_effnlink--; softdep_change_linkcnt(dp, 0); softdep_change_linkcnt(ip, 0); } if ((error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1)) != 0) { - dp->i_effnlink++; - ip->i_effnlink++; + UFS_WAPBL_END(dvp->v_mount); if (DOINGSOFTDEP(vp)) { + dp->i_effnlink++; + ip->i_effnlink++; softdep_change_linkcnt(dp, 0); softdep_change_linkcnt(ip, 0); } @@ -1365,14 +1475,22 @@ if (!DOINGSOFTDEP(vp)) { int ioflag; + dp->i_effnlink--; DIP_ADD(dp, nlink, -1); dp->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(dp, MNT_WAIT); + ip->i_effnlink--; DIP_ADD(ip, nlink, -1); ip->i_flag |= IN_CHANGE; ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC; error = UFS_TRUNCATE(ip, (off_t)0, ioflag, cnp->cn_cred); } cache_purge(vp); + /* + * Unlock the log while we still have reference to the unlinked + * directory vp so that it will not get locked for recycling + */ + UFS_WAPBL_END(dvp->v_mount); #ifdef UFS_DIRHASH /* Kill any active hash; i_effnlink == 0, so it will not come back. */ if (ip->i_dirhash != NULL) @@ -1397,6 +1515,7 @@ struct inode *ip; int len, error; + /* UFS_WAPBL_BEGIN1(dvp) performed by successful ufs_makeinode() */ error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, vpp, ap->a_cnp); if (error) @@ -1409,10 +1528,12 @@ memcpy(SHORTLINK(ip), ap->a_target, len); DIP_ASSIGN(ip, size, len); ip->i_flag |= IN_CHANGE | IN_UPDATE; + UFS_WAPBL_UPDATE(ip, 0); } else error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, - UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, NULL, - curproc); + UIO_SYSSPACE, IO_NODELOCKED | IO_JOURNALLOCKED, + ap->a_cnp->cn_cred, NULL, curproc); + UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp); vput(vp); return (error); } @@ -1593,6 +1714,7 @@ struct buf *bp = ap->a_bp; struct vnode *vp = bp->b_vp; struct inode *ip; + struct mount *mp; int error; int s; @@ -1622,7 +1744,28 @@ vp = ip->i_devvp; bp->b_dev = vp->v_rdev; (vp->v_op->vop_strategy)(ap); - return (0); + + if ((bp->b_flags & B_READ) == 0) + return (0); + + mp = wapbl_vptomp(vp); + if (mp == NULL || mp->mnt_wapbl_replay == NULL || + !WAPBL_REPLAY_ISOPEN(mp) || + !WAPBL_REPLAY_CAN_READ(mp, bp->b_blkno, bp->b_bcount)) + return (0); + + error = biowait(bp); + if (error) + return (error); + + error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno, bp->b_bcount); + if (error) { + s = splbio(); + SET(bp->b_flags, B_INVAL); + splx(s); + } + + return (error); } /* @@ -1897,6 +2040,8 @@ struct vnode *tvp; int error; + UFS_WAPBL_JUNLOCK_ASSERT(dvp->v_mount); + pdir = VTOI(dvp); #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) @@ -1917,10 +2062,23 @@ DIP_ASSIGN(ip, gid, DIP(pdir, gid)); DIP_ASSIGN(ip, uid, cnp->cn_cred->cr_uid); + error = UFS_WAPBL_BEGIN1(dvp->v_mount, dvp); + if (error) { + /* + * Note, we can't UFS_INODE_FREE() here like we should because + * we can't write to the disk. Instead, we leave the vnode + * dangling from the journal. + */ + vput(tvp); + vput(dvp); + return (error); + } + if ((error = getinoquota(ip)) || (error = ufs_quota_alloc_inode(ip, cnp->cn_cred))) { pool_put(&namei_pool, cnp->cn_pnbuf); UFS_INODE_FREE(ip, ip->i_number, mode); + UFS_WAPBL_END1(dvp->v_mount, dvp); vput(tvp); vput(dvp); return (error); @@ -1964,14 +2122,61 @@ ip->i_effnlink = 0; DIP_ASSIGN(ip, nlink, 0); ip->i_flag |= IN_CHANGE; + UFS_WAPBL_UPDATE(VTOI(tvp), 0); if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(ip, 0); tvp->v_type = VNON; + UFS_WAPBL_END1(dvp->v_mount, dvp); vput(tvp); return (error); } +/* + * Allocate len bytes at offset off. + */ +int +ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags, + struct ucred *cred) +{ + struct inode *ip = VTOI(vp); + int error, delta, bshift, bsize; + + error = 0; + bshift = ip->i_fs->fs_bshift; + bsize = 1 << bshift; + + delta = off & (bsize - 1); + off -= delta; + len += delta; + + while (len > 0) { + bsize = MIN(bsize, len); + + error = UFS_BUF_ALLOC(ip, off, bsize, cred, flags, NULL); + if (error) { + goto out; + } + + /* + * increase file size now, UFS_BUF_ALLOC() requires that + * EOF be up-to-date before each call. + */ + + if (DIP(ip, size) < off + bsize) { + /* ip->i_size = off + bsize; */ + DIP_ASSIGN(ip, size, off + bsize); + } + + off += bsize; + len -= bsize; + } + +out: + UFS_WAPBL_UPDATE(ip, 0); + return error; +} + struct filterops ufsread_filtops = { 1, NULL, filt_ufsdetach, filt_ufsread }; struct filterops ufswrite_filtops =