Hi Luis,
On Sat, 20 Mar 2021 01:00:45 +0100, David Disseldorp wrote:
> > As I've already told you somewhere else, copy_file_range (crf for short!)
> > implementations may expand any existing holes in the original file.
> > That's OK, I guess, but it may be worth adding a comment here and possibly
> > on the cpio(1) manpage, when documenting the --reflink option.
> >
> > Another option would be to add a bit more complexity here by calling crf
> > in a loop and lseek'ing SEEK_DATA/SEEK_HOLE. That's what my original RFC
> > patch was doing, but I agree that it's probably not worth it.
> >
> > My suggestion is to, at least, add here a small comment mentioning this
> > decision and referring to the cfr manpage for the details.
>
> It's still a little unclear to me under what condition hole expansion
> can occur (splice only, or also in some reflink cases, etc.). I'll add
> back the SEEK_DATA / SEEK_HOLE logic that you had, as it should at least
> speed up processing of Dracut padding entries.
As mentioned in the v2 patchset description:
+ I decided against adding back SEEK_DATA / SEEK_HOLE functionality to
the copy_files_range loop, as seek cursor tracking and error
handling add significant complexity with little benefit to my
initramfs generation workload
I've attached the RFC patch (applies atop V2) that I have for it, just
in case you're curious how ugly it gets ;-)
Cheers, David
>From e8e15b533a7d4bc61120a987ee5b653be4fe1534 Mon Sep 17 00:00:00 2001
From: David Disseldorp <[email protected]>
Date: Mon, 12 Apr 2021 19:41:52 +0200
Subject: [PATCH] RFC SQUASH seek past holes in copy_file_range loop
Use SEEK_DATA and SEEK_HOLE to skip over sparse ranges when performing
copy_file_range copy-out I/O.
XXX this significantly complicates error handling:
- the input file SEEK_DATA/_HOLE may move cursor past num_bytes
+ set cursor back to initial_offset + num_bytes on exit
+ error handling of unexpected seek errors is messy
- I've added error() exit cases for now
- output file seek cursor needs to be kept up to date in case of
copy_file_range() error fallback
---
src/util.c | 56 +++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 43 insertions(+), 13 deletions(-)
diff --git a/src/util.c b/src/util.c
index 4c0d75f..e5b70a1 100644
--- a/src/util.c
+++ b/src/util.c
@@ -523,13 +523,18 @@ copy_files_disk_to_tape (int in_des, int out_des, off_t num_bytes,
* descriptors. Copy-on-write enabled filesystems may optimize I/O by using
* reflinks.
*/
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
ssize_t
copy_files_range (int in_des, int out_des, off_t num_bytes)
{
- loff_t in_off = 0, out_off = 0;
- ssize_t total = 0;
+ loff_t in_off, out_off;
+ loff_t final_in_off, final_out_off;
+ off_t remaining = num_bytes;
ssize_t ret;
+ if (num_bytes == 0)
+ return 0;
+
in_off = lseek (in_des, 0, SEEK_CUR);
if (in_off < 0)
return -errno;
@@ -538,31 +543,56 @@ copy_files_range (int in_des, int out_des, off_t num_bytes)
if (out_off < 0)
return -errno;
- while (total < num_bytes)
+ /* SEEK_DATA/_HOLE may seek past num_bytes, so stash end offsets */
+ final_in_off = in_off + num_bytes;
+ final_out_off = out_off + num_bytes;
+
+ while (remaining > 0)
{
+ loff_t next_start, next_end;
+ size_t this_len;
+
+ next_start = lseek (in_des, in_off, SEEK_DATA);
+ if ((next_start < 0 && errno == ENXIO)
+ || (next_start > in_off + remaining))
+ {
+ /* hole extends beyond num_bytes. seek to final offset */
+ output_bytes += remaining;
+ input_bytes += remaining;
+ break;
+ }
+ else if (next_start < 0)
+ error(1, errno, "failed src SEEK_DATA");
+
+ next_end = lseek (in_des, next_start, SEEK_HOLE);
+ if (next_end < 0)
+ error(1, errno, "failed src SEEK_HOLE");
+
+ this_len = MIN(next_end - next_start, remaining);
+ out_off += next_start - in_off; /* seek past any hole */
+ in_off = next_start;
+
ret = copy_file_range (in_des, &in_off, out_des, &out_off,
- num_bytes - total, 0);
- /* simplify read/write fallback: don't partially seek on error */
+ this_len, 0);
if (ret == 0)
return -ENODATA;
if (ret < 0)
return -errno;
- total += ret;
+
+ remaining -= ret;
+ output_bytes += ret;
+ input_bytes += ret;
}
- /* copy complete. seek to new offset */
- in_off = lseek (in_des, in_off, SEEK_SET);
+ in_off = lseek (in_des, final_in_off, SEEK_SET);
if (in_off < 0)
error(1, errno, "failed final src seek");
- out_off = lseek (out_des, out_off, SEEK_SET);
+ out_off = lseek (out_des, final_out_off, SEEK_SET);
if (out_off < 0)
error(1, errno, "failed final dst seek");
- output_bytes += total;
- input_bytes += total;
-
- return total;
+ return num_bytes;
}
#endif /* HAVE_CPIO_REFLINK */
--
2.26.2