The cachestat() syscall reads page cache statistics straight from the file's f_mapping. Stackable filesystems such as overlayfs keep the data pages in an underlying inode's mapping rather than in the overlay inode's, so cachestat() reports all zeroes for them.
Add a ->cachestat() file operation and route the syscall through a new vfs_cachestat() helper that calls it when present, falling back to file's f_mapping otherwise. This lets stackable filesystems forward the query to the file that actually owns the page cache. No behaviour change for regular files. Signed-off-by: Pavel Tikhomirov <[email protected]> --- Note: Memset change might be a bit tricky, I moved it to no ->cachestat() path to avoid multiple memset on nested overlayfs, that means that ->cachestat() is expected to be able to handle unitialized cs. --- include/linux/fs.h | 10 ++++++++++ mm/filemap.c | 43 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 6da44573ce450..966b6564707e4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -53,6 +53,8 @@ struct bdi_writeback; struct bio; +struct cachestat_range; +struct cachestat; struct io_comp_batch; struct fiemap_extent_info; struct kiocb; @@ -1963,6 +1965,8 @@ struct file_operations { struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); + int (*cachestat)(struct file *file, struct cachestat_range *csr, + struct cachestat *cs); int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, unsigned int poll_flags); @@ -3633,6 +3637,12 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); +/* mm/filemap.c */ +#ifdef CONFIG_CACHESTAT_SYSCALL +int vfs_cachestat(struct file *file, struct cachestat_range *csr, + struct cachestat *cs); +#endif + static inline bool vfs_empty_path(int dfd, const char __user *path) { char c; diff --git a/mm/filemap.c b/mm/filemap.c index 7e467c81d2138..90608c6b1ce55 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -4714,6 +4714,37 @@ static inline bool can_do_cachestat(struct file *f) return file_permission(f, MAY_WRITE) == 0; } +/** + * vfs_cachestat() - query page cache statistics of a file + * @file: file to query + * @csr: byte range to query + * @cs: output statistics + * + * Compute the page cache statistics for the given byte range of @file. + * + * Stackable filesystems (e.g. overlayfs) keep the data pages in the + * mapping of an underlying file rather than in @file->f_mapping. Such + * filesystems provide a ->cachestat() file operation that forwards the + * query to the file that actually owns the page cache; otherwise the + * statistics are computed from @file->f_mapping directly. + */ +int vfs_cachestat(struct file *file, struct cachestat_range *csr, + struct cachestat *cs) +{ + pgoff_t first_index, last_index; + + if (file->f_op->cachestat) + return file->f_op->cachestat(file, csr, cs); + + first_index = csr->off >> PAGE_SHIFT; + last_index = + csr->len == 0 ? ULONG_MAX : (csr->off + csr->len - 1) >> PAGE_SHIFT; + memset(cs, 0, sizeof(struct cachestat)); + filemap_cachestat(file->f_mapping, first_index, last_index, cs); + return 0; +} +EXPORT_SYMBOL(vfs_cachestat); + /* * The cachestat(2) system call. * @@ -4753,10 +4784,9 @@ SYSCALL_DEFINE4(cachestat, unsigned int, fd, struct cachestat __user *, cstat, unsigned int, flags) { CLASS(fd, f)(fd); - struct address_space *mapping; struct cachestat_range csr; struct cachestat cs; - pgoff_t first_index, last_index; + int ret; if (fd_empty(f)) return -EBADF; @@ -4775,12 +4805,9 @@ SYSCALL_DEFINE4(cachestat, unsigned int, fd, if (flags != 0) return -EINVAL; - first_index = csr.off >> PAGE_SHIFT; - last_index = - csr.len == 0 ? ULONG_MAX : (csr.off + csr.len - 1) >> PAGE_SHIFT; - memset(&cs, 0, sizeof(struct cachestat)); - mapping = fd_file(f)->f_mapping; - filemap_cachestat(mapping, first_index, last_index, &cs); + ret = vfs_cachestat(fd_file(f), &csr, &cs); + if (ret) + return ret; if (copy_to_user(cstat, &cs, sizeof(struct cachestat))) return -EFAULT; -- 2.54.0

