The implementation is straightforward, since we already have per container writeback. Sync just get user beancounter and launches writeback work for it.
Note that per container sync[fs] works only for dirty data. Dirty metadata will be written out. This logic was inherted from PCS6 (see diff-ubc-dont-skip-dirty-metadata-on-filtered-sync patch). https://jira.sw.ru/browse/PSBM-39583 Signed-off-by: Andrey Ryabinin <[email protected]> --- fs/buffer.c | 2 +- fs/fs-writeback.c | 82 ++++++++++++++++++++++++++++++++--------------- fs/sync.c | 17 ++++++---- include/linux/writeback.h | 6 +++- mm/vmscan.c | 2 +- 5 files changed, 74 insertions(+), 35 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 2b709d4..d1eb45e 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -279,7 +279,7 @@ static void free_more_memory(void) struct zone *zone; int nid; - wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM); + wakeup_flusher_threads(1024, NULL, WB_REASON_FREE_MORE_MEM); yield(); for_each_online_node(nid) { diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d48530f9..0445155 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -132,7 +132,8 @@ out_unlock: static void __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, - bool range_cyclic, enum wb_reason reason) + struct user_beancounter *ub, bool range_cyclic, + enum wb_reason reason) { struct wb_writeback_work *work; @@ -151,6 +152,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, work->nr_pages = nr_pages; work->range_cyclic = range_cyclic; work->reason = reason; + work->ub = ub; bdi_queue_work(bdi, work); } @@ -170,7 +172,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, enum wb_reason reason) { - __bdi_start_writeback(bdi, nr_pages, true, reason); + __bdi_start_writeback(bdi, nr_pages, NULL, true, reason); } /** @@ -727,6 +729,7 @@ static long writeback_sb_inodes(struct super_block *sb, continue; } if ((work->ub || work->filter_ub) && + (inode->i_state & I_DIRTY) == I_DIRTY_PAGES) && ub_should_skip_writeback(work->ub, inode)) { spin_unlock(&inode->i_lock); redirty_tail(inode, wb); @@ -1148,7 +1151,8 @@ void bdi_writeback_workfn(struct work_struct *work) * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back * the whole world. */ -void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) +void wakeup_flusher_threads(long nr_pages, struct user_beancounter *ub, + enum wb_reason reason) { struct backing_dev_info *bdi; @@ -1159,7 +1163,7 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { if (!bdi_has_dirty_io(bdi)) continue; - __bdi_start_writeback(bdi, nr_pages, false, reason); + __bdi_start_writeback(bdi, nr_pages, ub, false, reason); } rcu_read_unlock(); } @@ -1374,7 +1378,7 @@ out_unlock_inode: } EXPORT_SYMBOL(__mark_inode_dirty); -static void wait_sb_inodes(struct super_block *sb) +static void wait_sb_inodes(struct super_block *sb, struct user_beancounter *ub) { struct inode *inode, *old_inode = NULL; @@ -1402,6 +1406,12 @@ static void wait_sb_inodes(struct super_block *sb) spin_unlock(&inode->i_lock); continue; } + if (ub && (mapping->dirtied_ub != ub) && + ((inode->i_state & I_DIRTY) == I_DIRTY_PAGES)) { + spin_unlock(&inode->i_lock); + continue; + } + __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&inode_sb_list_lock); @@ -1427,17 +1437,8 @@ static void wait_sb_inodes(struct super_block *sb) iput(old_inode); } -/** - * writeback_inodes_sb_nr - writeback dirty inodes from given super_block - * @sb: the superblock - * @nr: the number of pages to write - * @reason: reason why some writeback work initiated - * - * Start writeback on some inodes on this super_block. No guarantees are made - * on how many (if any) will be written, and this function does not wait - * for IO completion of submitted IO. - */ -void writeback_inodes_sb_nr(struct super_block *sb, +static void writeback_inodes_sb_ub_nr(struct super_block *sb, + struct user_beancounter *ub, unsigned long nr, enum wb_reason reason) { @@ -1449,6 +1450,7 @@ void writeback_inodes_sb_nr(struct super_block *sb, .done = &done, .nr_pages = nr, .reason = reason, + .ub = ub, }; if (sb->s_bdi == &noop_backing_dev_info) @@ -1457,8 +1459,32 @@ void writeback_inodes_sb_nr(struct super_block *sb, bdi_queue_work(sb->s_bdi, &work); wait_for_completion(&done); } + +/** + * writeback_inodes_sb_nr - writeback dirty inodes from given super_block + * @sb: the superblock + * @nr: the number of pages to write + * @reason: reason why some writeback work initiated + * + * Start writeback on some inodes on this super_block. No guarantees are made + * on how many (if any) will be written, and this function does not wait + * for IO completion of submitted IO. + */ +void writeback_inodes_sb_nr(struct super_block *sb, + unsigned long nr, + enum wb_reason reason) +{ + + writeback_inodes_sb_ub_nr(sb, NULL, nr, reason); +} EXPORT_SYMBOL(writeback_inodes_sb_nr); +void writeback_inodes_sb_ub(struct super_block *sb, struct user_beancounter *ub, + enum wb_reason reason) +{ + return writeback_inodes_sb_ub_nr(sb, ub, get_nr_dirty_pages(), reason); +} + /** * writeback_inodes_sb - writeback dirty inodes from given super_block * @sb: the superblock @@ -1513,14 +1539,7 @@ int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) } EXPORT_SYMBOL(try_to_writeback_inodes_sb); -/** - * sync_inodes_sb - sync sb inode pages - * @sb: the superblock - * - * This function writes and waits on any dirty inode belonging to this - * super_block. - */ -void sync_inodes_sb(struct super_block *sb) +void sync_inodes_sb_ub(struct super_block *sb, struct user_beancounter *ub) { DECLARE_COMPLETION_ONSTACK(done); struct wb_writeback_work work = { @@ -1531,6 +1550,7 @@ void sync_inodes_sb(struct super_block *sb) .done = &done, .reason = WB_REASON_SYNC, .for_sync = 1, + .ub = ub, }; /* Nothing to do? */ @@ -1541,7 +1561,19 @@ void sync_inodes_sb(struct super_block *sb) bdi_queue_work(sb->s_bdi, &work); wait_for_completion(&done); - wait_sb_inodes(sb); + wait_sb_inodes(sb, ub); +} + +/** + * sync_inodes_sb - sync sb inode pages + * @sb: the superblock + * + * This function writes and waits on any dirty inode belonging to this + * super_block. + */ +void sync_inodes_sb(struct super_block *sb) +{ + sync_inodes_sb_ub(sb, NULL); } EXPORT_SYMBOL(sync_inodes_sb); diff --git a/fs/sync.c b/fs/sync.c index ba033c1..bef5163 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -35,9 +35,9 @@ static int __sync_filesystem(struct super_block *sb, struct user_beancounter *ub, int wait) { if (wait) - sync_inodes_sb(sb); + sync_inodes_sb_ub(sb, ub); else - writeback_inodes_sb(sb, WB_REASON_SYNC); + writeback_inodes_sb_ub(sb, ub, WB_REASON_SYNC); if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, wait); @@ -80,7 +80,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem); static void sync_inodes_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY)) - sync_inodes_sb(sb); + sync_inodes_sb_ub(sb, (struct user_beancounter *)arg); } static void sync_fs_one_sb(struct super_block *sb, void *arg) @@ -133,7 +133,7 @@ int ve_fsync_behavior(void) SYSCALL_DEFINE0(sync) { struct ve_struct *ve = get_exec_env(); - struct user_beancounter *ub; + struct user_beancounter *ub, *sync_ub = NULL; int nowait = 0, wait = 1; ub = get_exec_ub(); @@ -154,15 +154,18 @@ SYSCALL_DEFINE0(sync) fsb = __ve_fsync_behavior(ve); if (fsb == FSYNC_NEVER) goto skip; + + if (fsb == FSYNC_FILTERED) + sync_ub = get_io_ub(); } - wakeup_flusher_threads(0, WB_REASON_SYNC); - iterate_supers(sync_inodes_one_sb, NULL); + wakeup_flusher_threads(0, ub, WB_REASON_SYNC); + iterate_supers(sync_inodes_one_sb, sync_ub); iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &wait); iterate_bdevs(fdatawrite_one_bdev, NULL); iterate_bdevs(fdatawait_one_bdev, NULL); - if (unlikely(laptop_mode)) + if (unlikely(laptop_mode) && !sync_ub) laptop_sync_completion(); skip: ub_percpu_inc(ub, sync_done); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a193a7e..2337227 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -93,13 +93,17 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *, enum wb_reason reason); +void writeback_inodes_sb_ub(struct super_block *, struct user_beancounter *, + enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); void sync_inodes_sb(struct super_block *); -void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); +void sync_inodes_sb_ub(struct super_block *, struct user_beancounter *ub); +void wakeup_flusher_threads(long nr_pages, struct user_beancounter *ub, + enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ diff --git a/mm/vmscan.c b/mm/vmscan.c index 0406c11..9611251 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2619,7 +2619,7 @@ retry: writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2; if (total_scanned > writeback_threshold) { wakeup_flusher_threads(laptop_mode ? 0 : total_scanned, - WB_REASON_TRY_TO_FREE_PAGES); + NULL, WB_REASON_TRY_TO_FREE_PAGES); sc->may_writepage = 1; } -- 2.4.10 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
