The cachestat() syscall reads page cache statistics straight from the
file's f_mapping. Stackable filesystems such as overlayfs keep the data
pages in an underlying inode's mapping rather than in the overlay
inode's, so cachestat() reports all zeroes for them.

Add a ->cachestat() file operation and route the syscall through a new
vfs_cachestat() helper that calls it when present, falling back to
file's f_mapping otherwise. This lets stackable filesystems forward the
query to the file that actually owns the page cache. No behaviour change
for regular files.

Signed-off-by: Pavel Tikhomirov <[email protected]>
---
Note: Memset change might be a bit tricky, I moved it to no
->cachestat() path to avoid multiple memset on nested overlayfs, that
means that ->cachestat() is expected to be able to handle unitialized
cs.
---
 include/linux/fs.h | 10 ++++++++++
 mm/filemap.c       | 43 +++++++++++++++++++++++++++++++++++--------
 2 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6da44573ce450..966b6564707e4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -53,6 +53,8 @@
 
 struct bdi_writeback;
 struct bio;
+struct cachestat_range;
+struct cachestat;
 struct io_comp_batch;
 struct fiemap_extent_info;
 struct kiocb;
@@ -1963,6 +1965,8 @@ struct file_operations {
                                   struct file *file_out, loff_t pos_out,
                                   loff_t len, unsigned int remap_flags);
        int (*fadvise)(struct file *, loff_t, loff_t, int);
+       int (*cachestat)(struct file *file, struct cachestat_range *csr,
+                        struct cachestat *cs);
        int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
        int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
                                unsigned int poll_flags);
@@ -3633,6 +3637,12 @@ extern int vfs_fadvise(struct file *file, loff_t offset, 
loff_t len,
 extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
                           int advice);
 
+/* mm/filemap.c */
+#ifdef CONFIG_CACHESTAT_SYSCALL
+int vfs_cachestat(struct file *file, struct cachestat_range *csr,
+                 struct cachestat *cs);
+#endif
+
 static inline bool vfs_empty_path(int dfd, const char __user *path)
 {
        char c;
diff --git a/mm/filemap.c b/mm/filemap.c
index 7e467c81d2138..90608c6b1ce55 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -4714,6 +4714,37 @@ static inline bool can_do_cachestat(struct file *f)
        return file_permission(f, MAY_WRITE) == 0;
 }
 
+/**
+ * vfs_cachestat() - query page cache statistics of a file
+ * @file:      file to query
+ * @csr:       byte range to query
+ * @cs:                output statistics
+ *
+ * Compute the page cache statistics for the given byte range of @file.
+ *
+ * Stackable filesystems (e.g. overlayfs) keep the data pages in the
+ * mapping of an underlying file rather than in @file->f_mapping. Such
+ * filesystems provide a ->cachestat() file operation that forwards the
+ * query to the file that actually owns the page cache; otherwise the
+ * statistics are computed from @file->f_mapping directly.
+ */
+int vfs_cachestat(struct file *file, struct cachestat_range *csr,
+                 struct cachestat *cs)
+{
+       pgoff_t first_index, last_index;
+
+       if (file->f_op->cachestat)
+               return file->f_op->cachestat(file, csr, cs);
+
+       first_index = csr->off >> PAGE_SHIFT;
+       last_index =
+               csr->len == 0 ? ULONG_MAX : (csr->off + csr->len - 1) >> 
PAGE_SHIFT;
+       memset(cs, 0, sizeof(struct cachestat));
+       filemap_cachestat(file->f_mapping, first_index, last_index, cs);
+       return 0;
+}
+EXPORT_SYMBOL(vfs_cachestat);
+
 /*
  * The cachestat(2) system call.
  *
@@ -4753,10 +4784,9 @@ SYSCALL_DEFINE4(cachestat, unsigned int, fd,
                struct cachestat __user *, cstat, unsigned int, flags)
 {
        CLASS(fd, f)(fd);
-       struct address_space *mapping;
        struct cachestat_range csr;
        struct cachestat cs;
-       pgoff_t first_index, last_index;
+       int ret;
 
        if (fd_empty(f))
                return -EBADF;
@@ -4775,12 +4805,9 @@ SYSCALL_DEFINE4(cachestat, unsigned int, fd,
        if (flags != 0)
                return -EINVAL;
 
-       first_index = csr.off >> PAGE_SHIFT;
-       last_index =
-               csr.len == 0 ? ULONG_MAX : (csr.off + csr.len - 1) >> 
PAGE_SHIFT;
-       memset(&cs, 0, sizeof(struct cachestat));
-       mapping = fd_file(f)->f_mapping;
-       filemap_cachestat(mapping, first_index, last_index, &cs);
+       ret = vfs_cachestat(fd_file(f), &csr, &cs);
+       if (ret)
+               return ret;
 
        if (copy_to_user(cstat, &cs, sizeof(struct cachestat)))
                return -EFAULT;
-- 
2.54.0


Reply via email to