Hi all, After the previous patch got merged where we used the newly added bulk page write in ext2fs for FILE types, here I am adding such an implementation for DISK path, which should complete all the writes EXT2 does.
Let me know your thoughts, Regards, Milos
From df73441edfe40faef9898ac0cca993c7902a53a0 Mon Sep 17 00:00:00 2001 From: Milos Nikic <[email protected]> Date: Tue, 2 Sep 2025 17:50:47 -0700 Subject: [PATCH] libpager, ext2fs: add bulk page write support for DISK type too. Following FILE type where we sped up the ext2fs file write by about 25x now we do the same for DISK type. --- ext2fs/pager.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 133 insertions(+), 5 deletions(-) diff --git a/ext2fs/pager.c b/ext2fs/pager.c index a7801bea..624471ad 100644 --- a/ext2fs/pager.c +++ b/ext2fs/pager.c @@ -510,7 +510,129 @@ file_pager_write_pages (struct node *node, return err; } -/* Strong override: only FILE_DATA uses bulk; others keep per-page path. */ +/* Bulk write for DISK pager across [offset, offset + length). + We always iterate block-by-block, build exactly one strictly-consecutive + run per batch (stop at first gap or when hitting EXT2_BULK_MAX_BLOCKS), + and flush via pending_blocks_write(), which coalesces into large writes. + Invariant: we either make forward progress or return an error. + Progress is reported page-aligned via *written (if non-NULL). */ +static error_t +disk_pager_write_pages (vm_offset_t offset, + vm_address_t buf, + vm_size_t length, vm_size_t *written) +{ + error_t err = 0; + vm_size_t done = 0; /* bytes enumerated+flushed */ + const unsigned max_blocks = EXT2_BULK_MAX_BLOCKS; + const store_offset_t dev_end = store->size; /* device size in bytes */ + + if (written) + *written = 0; + + while (done < length) + { + vm_size_t left = length - done; + + /* Build exactly one consecutive run (up to max_blocks). */ + struct pending_blocks pb; + vm_size_t built = 0; /* bytes added to this run */ + vm_size_t blocks_built = 0; /* FS blocks added to this run */ + block_t prev = 0; /* previous device block (for gap detection) */ + + pending_blocks_init (&pb, (void *) (buf + done)); + + while (blocks_built < max_blocks && built < left) + { + vm_offset_t voff = offset + done + built; + + /* Translate voff -> device block using disk_cache_info. */ + int index = voff >> log2_block_size; + + pthread_mutex_lock (&disk_cache_lock); + assert_backtrace (disk_cache_info[index].block != DC_NO_BLOCK); + + store_offset_t dev_off = + ((store_offset_t) disk_cache_info[index].block << log2_block_size) + + (voff % block_size); +#ifdef DEBUG_DISK_CACHE + assert_backtrace ((disk_cache_info[index]. + last_read ^ DISK_CACHE_LAST_READ_XOR) == + disk_cache_info[index].last_read_xor); + assert_backtrace (disk_cache_info[index].last_read == + disk_cache_info[index].block); +#endif + pthread_mutex_unlock (&disk_cache_lock); + + /* If at/past end of device, we cannot progress -> error. */ + if (dev_off >= dev_end) + { + err = ENOSPC; + break; + } + + block_t block = boffs_block (dev_off); + + /* Stop this batch at the first non-consecutive block (don’t consume it). */ + if (prev && block != prev + 1) + break; + prev = block; + + if (modified_global_blocks) + { + /* Avoid overspray when FS block < page size. */ + if (test_bit (block, modified_global_blocks)) + err = pending_blocks_add (&pb, block); + else + err = pending_blocks_skip (&pb); + } + else + { + /* No mask: enqueue every block; writer will coalesce contiguous LBAs. */ + err = pending_blocks_add (&pb, block); + } + + if (err) + break; + + built += block_size; + blocks_built++; + } + + /* Maintain invariant: either we made progress, or it's an error. */ + if (err) + break; + if (blocks_built == 0) + { + err = ENOSPC; + break; + } + + /* Flush this consecutive run (collapses to few large store_writes). */ + error_t werr = pending_blocks_write (&pb); + if (!err) + err = werr; + + /* Advance only by what we enumerated. */ + done += built; + + if (err) + break; + } + + if (written) + { + vm_size_t w = done; + if (w > length) + w = length; + /* libpager expects page-aligned progress. */ + w -= (w % vm_page_size); + *written = w; + } + + return err; +} + +/* Strong override. */ error_t pager_write_pages (struct user_pager_info *pager, vm_offset_t offset, @@ -518,11 +640,17 @@ pager_write_pages (struct user_pager_info *pager, vm_size_t length, vm_size_t *written) { - /* libpager will just hand this off to the pager_write_page. */ - if (pager->type != FILE_DATA) - return EOPNOTSUPP; + switch (pager->type) + { + case FILE_DATA: + return file_pager_write_pages (pager->node, offset, data, length, written); - return file_pager_write_pages (pager->node, offset, data, length, written); + case DISK: + return disk_pager_write_pages (offset, data, length, written); + + default: + return EOPNOTSUPP; + } } -- 2.51.0
