The implementation is straightforward, since we already have per container
writeback. Sync just get user beancounter and launches writeback work for it.

Note that per container sync[fs] works only for dirty data. Dirty metadata
will be written out. This logic was inherted from PCS6
(see diff-ubc-dont-skip-dirty-metadata-on-filtered-sync patch).

https://jira.sw.ru/browse/PSBM-39583

Signed-off-by: Andrey Ryabinin <[email protected]>
---
 fs/buffer.c               |  2 +-
 fs/fs-writeback.c         | 82 ++++++++++++++++++++++++++++++++---------------
 fs/sync.c                 | 17 ++++++----
 include/linux/writeback.h |  6 +++-
 mm/vmscan.c               |  2 +-
 5 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 2b709d4..d1eb45e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -279,7 +279,7 @@ static void free_more_memory(void)
        struct zone *zone;
        int nid;
 
-       wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
+       wakeup_flusher_threads(1024, NULL, WB_REASON_FREE_MORE_MEM);
        yield();
 
        for_each_online_node(nid) {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d48530f9..0445155 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -132,7 +132,8 @@ out_unlock:
 
 static void
 __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
-                     bool range_cyclic, enum wb_reason reason)
+                       struct user_beancounter *ub, bool range_cyclic,
+                       enum wb_reason reason)
 {
        struct wb_writeback_work *work;
 
@@ -151,6 +152,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long 
nr_pages,
        work->nr_pages  = nr_pages;
        work->range_cyclic = range_cyclic;
        work->reason    = reason;
+       work->ub        = ub;
 
        bdi_queue_work(bdi, work);
 }
@@ -170,7 +172,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long 
nr_pages,
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
                        enum wb_reason reason)
 {
-       __bdi_start_writeback(bdi, nr_pages, true, reason);
+       __bdi_start_writeback(bdi, nr_pages, NULL, true, reason);
 }
 
 /**
@@ -727,6 +729,7 @@ static long writeback_sb_inodes(struct super_block *sb,
                        continue;
                }
                if ((work->ub || work->filter_ub) &&
+                   (inode->i_state & I_DIRTY) == I_DIRTY_PAGES) &&
                     ub_should_skip_writeback(work->ub, inode)) {
                        spin_unlock(&inode->i_lock);
                        redirty_tail(inode, wb);
@@ -1148,7 +1151,8 @@ void bdi_writeback_workfn(struct work_struct *work)
  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
  * the whole world.
  */
-void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
+void wakeup_flusher_threads(long nr_pages, struct user_beancounter *ub,
+                       enum wb_reason reason)
 {
        struct backing_dev_info *bdi;
 
@@ -1159,7 +1163,7 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason 
reason)
        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
                if (!bdi_has_dirty_io(bdi))
                        continue;
-               __bdi_start_writeback(bdi, nr_pages, false, reason);
+               __bdi_start_writeback(bdi, nr_pages, ub, false, reason);
        }
        rcu_read_unlock();
 }
@@ -1374,7 +1378,7 @@ out_unlock_inode:
 }
 EXPORT_SYMBOL(__mark_inode_dirty);
 
-static void wait_sb_inodes(struct super_block *sb)
+static void wait_sb_inodes(struct super_block *sb, struct user_beancounter *ub)
 {
        struct inode *inode, *old_inode = NULL;
 
@@ -1402,6 +1406,12 @@ static void wait_sb_inodes(struct super_block *sb)
                        spin_unlock(&inode->i_lock);
                        continue;
                }
+               if (ub && (mapping->dirtied_ub != ub) &&
+                   ((inode->i_state & I_DIRTY) == I_DIRTY_PAGES)) {
+                       spin_unlock(&inode->i_lock);
+                       continue;
+               }
+
                __iget(inode);
                spin_unlock(&inode->i_lock);
                spin_unlock(&inode_sb_list_lock);
@@ -1427,17 +1437,8 @@ static void wait_sb_inodes(struct super_block *sb)
        iput(old_inode);
 }
 
-/**
- * writeback_inodes_sb_nr -    writeback dirty inodes from given super_block
- * @sb: the superblock
- * @nr: the number of pages to write
- * @reason: reason why some writeback work initiated
- *
- * Start writeback on some inodes on this super_block. No guarantees are made
- * on how many (if any) will be written, and this function does not wait
- * for IO completion of submitted IO.
- */
-void writeback_inodes_sb_nr(struct super_block *sb,
+static void writeback_inodes_sb_ub_nr(struct super_block *sb,
+                           struct user_beancounter *ub,
                            unsigned long nr,
                            enum wb_reason reason)
 {
@@ -1449,6 +1450,7 @@ void writeback_inodes_sb_nr(struct super_block *sb,
                .done                   = &done,
                .nr_pages               = nr,
                .reason                 = reason,
+               .ub                     = ub,
        };
 
        if (sb->s_bdi == &noop_backing_dev_info)
@@ -1457,8 +1459,32 @@ void writeback_inodes_sb_nr(struct super_block *sb,
        bdi_queue_work(sb->s_bdi, &work);
        wait_for_completion(&done);
 }
+
+/**
+ * writeback_inodes_sb_nr -    writeback dirty inodes from given super_block
+ * @sb: the superblock
+ * @nr: the number of pages to write
+ * @reason: reason why some writeback work initiated
+ *
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO.
+ */
+void writeback_inodes_sb_nr(struct super_block *sb,
+                           unsigned long nr,
+                           enum wb_reason reason)
+{
+
+       writeback_inodes_sb_ub_nr(sb, NULL, nr, reason);
+}
 EXPORT_SYMBOL(writeback_inodes_sb_nr);
 
+void writeback_inodes_sb_ub(struct super_block *sb, struct user_beancounter 
*ub,
+                       enum wb_reason reason)
+{
+       return writeback_inodes_sb_ub_nr(sb, ub, get_nr_dirty_pages(), reason);
+}
+
 /**
  * writeback_inodes_sb -       writeback dirty inodes from given super_block
  * @sb: the superblock
@@ -1513,14 +1539,7 @@ int try_to_writeback_inodes_sb(struct super_block *sb, 
enum wb_reason reason)
 }
 EXPORT_SYMBOL(try_to_writeback_inodes_sb);
 
-/**
- * sync_inodes_sb      -       sync sb inode pages
- * @sb: the superblock
- *
- * This function writes and waits on any dirty inode belonging to this
- * super_block.
- */
-void sync_inodes_sb(struct super_block *sb)
+void sync_inodes_sb_ub(struct super_block *sb, struct user_beancounter *ub)
 {
        DECLARE_COMPLETION_ONSTACK(done);
        struct wb_writeback_work work = {
@@ -1531,6 +1550,7 @@ void sync_inodes_sb(struct super_block *sb)
                .done           = &done,
                .reason         = WB_REASON_SYNC,
                .for_sync       = 1,
+               .ub             = ub,
        };
 
        /* Nothing to do? */
@@ -1541,7 +1561,19 @@ void sync_inodes_sb(struct super_block *sb)
        bdi_queue_work(sb->s_bdi, &work);
        wait_for_completion(&done);
 
-       wait_sb_inodes(sb);
+       wait_sb_inodes(sb, ub);
+}
+
+/**
+ * sync_inodes_sb      -       sync sb inode pages
+ * @sb: the superblock
+ *
+ * This function writes and waits on any dirty inode belonging to this
+ * super_block.
+ */
+void sync_inodes_sb(struct super_block *sb)
+{
+       sync_inodes_sb_ub(sb, NULL);
 }
 EXPORT_SYMBOL(sync_inodes_sb);
 
diff --git a/fs/sync.c b/fs/sync.c
index ba033c1..bef5163 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -35,9 +35,9 @@ static int __sync_filesystem(struct super_block *sb,
                             struct user_beancounter *ub, int wait)
 {
        if (wait)
-               sync_inodes_sb(sb);
+               sync_inodes_sb_ub(sb, ub);
        else
-               writeback_inodes_sb(sb, WB_REASON_SYNC);
+               writeback_inodes_sb_ub(sb, ub, WB_REASON_SYNC);
 
        if (sb->s_op->sync_fs)
                sb->s_op->sync_fs(sb, wait);
@@ -80,7 +80,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem);
 static void sync_inodes_one_sb(struct super_block *sb, void *arg)
 {
        if (!(sb->s_flags & MS_RDONLY))
-               sync_inodes_sb(sb);
+               sync_inodes_sb_ub(sb, (struct user_beancounter *)arg);
 }
 
 static void sync_fs_one_sb(struct super_block *sb, void *arg)
@@ -133,7 +133,7 @@ int ve_fsync_behavior(void)
 SYSCALL_DEFINE0(sync)
 {
        struct ve_struct *ve = get_exec_env();
-       struct user_beancounter *ub;
+       struct user_beancounter *ub, *sync_ub = NULL;
        int nowait = 0, wait = 1;
 
        ub = get_exec_ub();
@@ -154,15 +154,18 @@ SYSCALL_DEFINE0(sync)
                fsb = __ve_fsync_behavior(ve);
                if (fsb == FSYNC_NEVER)
                        goto skip;
+
+               if (fsb == FSYNC_FILTERED)
+                       sync_ub = get_io_ub();
        }
 
-       wakeup_flusher_threads(0, WB_REASON_SYNC);
-       iterate_supers(sync_inodes_one_sb, NULL);
+       wakeup_flusher_threads(0, ub, WB_REASON_SYNC);
+       iterate_supers(sync_inodes_one_sb, sync_ub);
        iterate_supers(sync_fs_one_sb, &nowait);
        iterate_supers(sync_fs_one_sb, &wait);
        iterate_bdevs(fdatawrite_one_bdev, NULL);
        iterate_bdevs(fdatawait_one_bdev, NULL);
-       if (unlikely(laptop_mode))
+       if (unlikely(laptop_mode) && !sync_ub)
                laptop_sync_completion();
 skip:
        ub_percpu_inc(ub, sync_done);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a193a7e..2337227 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -93,13 +93,17 @@ struct writeback_control {
 struct bdi_writeback;
 int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
+void writeback_inodes_sb_ub(struct super_block *, struct user_beancounter *,
+                                                       enum wb_reason reason);
 void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
                                                        enum wb_reason reason);
 int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
 int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
                                  enum wb_reason reason);
 void sync_inodes_sb(struct super_block *);
-void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
+void sync_inodes_sb_ub(struct super_block *, struct user_beancounter *ub);
+void wakeup_flusher_threads(long nr_pages, struct user_beancounter *ub,
+                               enum wb_reason reason);
 void inode_wait_for_writeback(struct inode *inode);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0406c11..9611251 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2619,7 +2619,7 @@ retry:
                writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
                if (total_scanned > writeback_threshold) {
                        wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
-                                               WB_REASON_TRY_TO_FREE_PAGES);
+                                               NULL, 
WB_REASON_TRY_TO_FREE_PAGES);
                        sc->may_writepage = 1;
                }
 
-- 
2.4.10

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to