After recent reboot related improvements, I have some familiarity with tricking the vnode layer into a stable state.
This should settle filesystems "clean" onto disk. If one does a suspend and then runs out of battery.. or experiences a crash or out of battery during a hibernate, the filesystems will now be in a clean state and not require an fsck. After the screen blanks, there may be 1-2 seconds more IO than before. Those weren't hitting the platter, but remained in the vfs softlayer through to resume. The new situation is much better. I have wanted to solve this problem for years.. There may be problems with softraid again, we'll need to see. I haven't tested softdep but the correct syncronization functions appear to be called. USB sticks get detached during the suspend phase, and are as good as before. This isn't quite finished, and I have a few more tricks up my sleeve, and the "vfs_stalling" global will probably become a parameter for VFS_SYNC()... I'd appreciate reports about how well it delivers and if any new problems show up. Index: kern/vfs_subr.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_subr.c,v retrieving revision 1.265 diff -u -p -u -r1.265 vfs_subr.c --- kern/vfs_subr.c 14 Dec 2017 20:23:15 -0000 1.265 +++ kern/vfs_subr.c 16 Dec 2017 23:50:21 -0000 @@ -1583,6 +1583,42 @@ vaccess(enum vtype type, mode_t file_mod return (file_mode & mask) == mask ? 0 : EACCES; } +int vfs_stalling; + +int +vfs_stallmp(struct mount *mp, struct proc *p, int stall) +{ + int error; + + if (stall) { + error = vfs_busy(mp, VB_WRITE|VB_WAIT); + if (error) { + printf("%s: busy\n", mp->mnt_stat.f_mntonname); + return (error); + } + uvm_vnp_sync(mp); + vfs_stalling = stall; + error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); + if (error) { + printf("%s: failed to sync\n", mp->mnt_stat.f_mntonname); + vfs_unbusy(mp); + return (error); + } + } else { + vfs_unbusy(mp); + } + return (0); +} + +void +vfs_stall(struct proc *p, int stall) +{ + struct mount *mp, *nmp; + + TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) + (void) vfs_stallmp(mp, p, stall); +} + int vfs_readonly(struct mount *mp, struct proc *p) { @@ -1627,10 +1663,8 @@ vfs_rofs(struct proc *p) { struct mount *mp, *nmp; - TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) { - /* XXX Here is a race, the next pointer is not locked. */ + TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) (void) vfs_readonly(mp, p); - } } /* Index: ufs/ffs/ffs_vfsops.c =================================================================== RCS file: /cvs/src/sys/ufs/ffs/ffs_vfsops.c,v retrieving revision 1.170 diff -u -p -u -r1.170 ffs_vfsops.c --- ufs/ffs/ffs_vfsops.c 14 Dec 2017 20:20:38 -0000 1.170 +++ ufs/ffs/ffs_vfsops.c 17 Dec 2017 03:03:06 -0000 @@ -1145,7 +1145,8 @@ struct ffs_sync_args { }; int -ffs_sync_vnode(struct vnode *vp, void *arg) { +ffs_sync_vnode(struct vnode *vp, void *arg) +{ struct ffs_sync_args *fsa = arg; struct inode *ip; int error; @@ -1193,8 +1194,9 @@ ffs_sync(struct mount *mp, int waitfor, { struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; - int error, allerror = 0, count; + int error, allerror = 0, count, clean, fmod; struct ffs_sync_args fsa; + extern int vfs_stalling; fs = ump->um_fs; /* @@ -1240,12 +1242,23 @@ ffs_sync(struct mount *mp, int waitfor, VOP_UNLOCK(ump->um_devvp, p); } qsync(mp); + /* * Write back modified superblock. */ - + clean = fs->fs_clean; + fmod = fs->fs_fmod; + if (vfs_stalling && (fs->fs_ronly == 0)) { + fs->fs_fmod = 1; + fs->fs_clean = (fs->fs_flags & FS_UNCLEAN) ? 0 : 1; +// printf("%s clean %d fmod %d\n", +// mp->mnt_stat.f_mntonname, fs->fs_clean, +// fs->fs_fmod); + } if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) allerror = error; + fs->fs_clean = clean; + fs->fs_fmod = fmod; return (allerror); } Index: dev/acpi/acpi.c =================================================================== RCS file: /cvs/src/sys/dev/acpi/acpi.c,v retrieving revision 1.335 diff -u -p -u -r1.335 acpi.c --- dev/acpi/acpi.c 29 Nov 2017 22:51:01 -0000 1.335 +++ dev/acpi/acpi.c 17 Dec 2017 00:05:29 -0000 @@ -2439,6 +2439,8 @@ acpi_sleep_state(struct acpi_softc *sc, { extern int perflevel; extern int lid_action; + extern void vfs_stall(struct proc *, int); + extern int sys_sync(struct proc *, void *, register_t *); int error = ENXIO; size_t rndbuflen = 0; char *rndbuf = NULL; @@ -2482,6 +2484,7 @@ acpi_sleep_state(struct acpi_softc *sc, #ifdef HIBERNATE if (sleepmode == ACPI_SLEEP_HIBERNATE) { uvmpd_hibernate(); + // XXX and then vfs_stall + bufq_quiesce?? Bogus! hibernate_suspend_bufcache(); if (hibernate_alloc()) { printf("%s: failed to allocate hibernate memory\n", @@ -2495,6 +2498,7 @@ acpi_sleep_state(struct acpi_softc *sc, if (config_suspend_all(DVACT_QUIESCE)) goto fail_quiesce; + vfs_stall(curproc, 1); bufq_quiesce(); #ifdef MULTIPROCESSOR @@ -2507,6 +2511,8 @@ acpi_sleep_state(struct acpi_softc *sc, disable_intr(); /* PSL_I for resume; PIC/APIC broken until repair */ cold = 2; /* Force other code to delay() instead of tsleep() */ + vfs_stall(curproc, 0); + if (config_suspend_all(DVACT_SUSPEND) != 0) goto fail_suspend; acpi_sleep_clocks(sc, state); @@ -2588,6 +2594,8 @@ fail_alloc: wsdisplay_resume(); rw_enter_write(&sc->sc_lck); #endif /* NWSDISPLAY > 0 */ + + sys_sync(curproc, NULL, NULL); /* Restore hw.setperf */ if (cpu_setperf != NULL)