Package: e2fsprogs Version: 1.47.2~rc1-2~bpo12+2 Severity: normal Dear Maintainer,
when doing rm of a very large file (or possibly just a file whose block extends past 2^32-1) it fails with FUSE2FS-remove_inode: put ino=12 links=1 fuse: bad error value: 75 unique: 16, error: -34 (Numerical result out of range), outsize: 16 rm: cannot remove 'mount/filler': Numerical result out of range rm in debugfs does work. Attached patch replaces the deallocate block logic with that from debugfs. This possibly points to a bug in the underlying ext2_punch routine that fuse2fs is using but I haven't tried to investigate further. This function is also called from punch_helper so I suspect that using fallocate to punch holes might also have problems. Four files are attached: rm-fix.patch - the actual fix for this bug test.sh - a short test script to trigger this bug inusefile.patch - adds support for the -o inusefile= flag that the test uses. If you don't apply this patch then you'll need to add a large sleep after the fusermount -u calls instead (c50s is required on my system) lseek.patch - irrelevant to this fix but the other two patches are built on it and will (probably) apply but with line offsets. This adds support for SEEK_HOLE and SEEK_DATA. I intend to send this upstream eventually but I've got no time to work on writing any tests right now - but it's working perfectly for my usecase. N.B. The test takes around 10-15 minutes to run, most of the time is in the rm and requires c1.5GB of disk space. It will not clean up properly if it fails part way through. Tim. -- System Information: Debian Release: 12.11 APT prefers stable-security APT policy: (500, 'stable-security'), (500, 'stable') Architecture: amd64 (x86_64) Kernel: Linux 6.1.0-35-amd64 (SMP w/4 CPU threads; PREEMPT) Kernel taint flags: TAINT_WARN Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8), LANGUAGE not set Shell: /bin/sh linked to /usr/bin/dash Init: sysvinit (via /sbin/init) Versions of packages e2fsprogs depends on: ii libblkid1 2.38.1-5+deb12u3 ii libc6 2.36-9+deb12u10 ii libcom-err2 1.47.0-2 ii libext2fs2 1.47.2~rc1-2~bpo12+2 ii libss2 1.47.0-2 ii libuuid1 2.38.1-5+deb12u3 ii logsave 1.47.0-2 Versions of packages e2fsprogs recommends: pn e2fsprogs-l10n <none> Versions of packages e2fsprogs suggests: pn e2fsck-static <none> ii fuse2fs 1.47.2~rc1-2~bpo12+2 pn gpart <none> pn parted <none> -- no debconf information
diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c e2fsprogs-1.47.2~rc1/misc/fuse2fs.c --- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000 +++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000 @@ -1237,6 +1237,25 @@ return update_mtime(fs, dir, NULL); } +static int release_blocks_proc(ext2_filsys fs, blk64_t *blocknr, + e2_blkcnt_t blockcnt EXT2FS_ATTR((unused)), + blk64_t ref_block EXT2FS_ATTR((unused)), + int ref_offset EXT2FS_ATTR((unused)), + void *private) +{ + blk64_t block = *blocknr; + blk64_t *last_cluster = (blk64_t *)private; + blk64_t cluster = EXT2FS_B2C(fs, block); + + if (cluster == *last_cluster) + return 0; + + *last_cluster = cluster; + + ext2fs_block_alloc_stats2(fs, block, -1); + return 0; +} + static int remove_inode(struct fuse2fs *ff, ext2_ino_t ino) { ext2_filsys fs = ff->fs; @@ -1278,8 +1297,11 @@ goto write_out; if (ext2fs_inode_has_valid_blocks2(fs, (struct ext2_inode *)&inode)) { - err = ext2fs_punch(fs, ino, (struct ext2_inode *)&inode, NULL, - 0, ~0ULL); + blk64_t last_cluster = 0; + ext2fs_block_iterate3(fs, ino, BLOCK_FLAG_READ_ONLY, + NULL, release_blocks_proc, &last_cluster); +// err = ext2fs_punch(fs, ino, (struct ext2_inode *)&inode, NULL, +// 0, ~0ULL); if (err) { ret = translate_error(fs, ino, err); goto write_out;
#!/bin/bash set -e ROOT=mount DEV=container_mount/pv1 rm -f container rm -fr container_mount rm -fr mount mkdir -p container_mount # create a container fs that can hold a 5T sparse file truncate -s 3G container /sbin/mke2fs -t ext4 -O \ none,has_journal,ext_attr,dir_index,filetype,extent,64bit,flex_bg,sparse_super,large_file,huge_file,dir_nlink,extra_isize,metadata_csum \ -b 4096 container fuse2fs -o fakeroot -o inusefile=container.inuse container container_mount mkdir -p "$ROOT" echo "truncate $(date)" time truncate -s 5T "${DEV}" echo "mke2fs $(date)" time /sbin/mkfs.ext4 -N 1000000 -O \ none,has_journal,ext_attr,dir_index,filetype,extent,64bit,flex_bg,sparse_super,large_file,huge_file,dir_nlink,extra_isize,metadata_csum \ -b 1024 "$DEV" echo "fuse2fs ${DEV} ${ROOT} $(date)" time fuse2fs -o fakeroot -o inusefile="$DEV.inuse" "$DEV" "$ROOT" echo "make filler $(date) - this is slow" time fallocate -l 4294967295K "${ROOT}/filler" echo "fusermount -u $ROOT $(date)" time fusermount -u "$ROOT" while [[ -f "${DEV}.inuse" ]]; do sleep 10 echo "Waiting for fuse to complete" done echo "fuse2fs ${DEV} ${ROOT} $(date)" time fuse2fs -o fakeroot -o inusefile="$DEV.inuse" "$DEV" "$ROOT" echo "rm filler $(date) - this is slow" time rm "${ROOT}/filler" echo "fusermount -u $ROOT $(date)" time fusermount -u "$ROOT" while [[ -f "${DEV}.inuse" ]]; do sleep 10 echo "Waiting for fuse to complete" done fusermount -u container_mount while [[ -f container.inuse ]]; do echo "Waiting for container fuse" sleep 10 done rm container rmdir $ROOT rmdir container_mount exit 0
diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c e2fsprogs-1.47.2~rc1/misc/fuse2fs.c --- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000 +++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000 @@ -348,6 +348,7 @@ unsigned long offset; FILE *err_fp; unsigned int next_generation; + char* inusefile; }; #define FUSE2FS_CHECK_MAGIC(fs, ptr, num) do {if ((ptr)->magic != (num)) \ @@ -3873,6 +3874,7 @@ FUSE2FS_OPT("no_default_opts", no_default_opts, 1), FUSE2FS_OPT("norecovery", norecovery, 1), FUSE2FS_OPT("offset=%lu", offset, 0), + FUSE2FS_OPT("inusefile=%s", inusefile, 0), FUSE_OPT_KEY("-V", FUSE2FS_VERSION), FUSE_OPT_KEY("--version", FUSE2FS_VERSION), @@ -3914,6 +3916,7 @@ " -o offset=<bytes> similar to mount -o offset=<bytes>, mount the partition starting at <bytes>\n" " -o norecovery don't replay the journal (implies ro)\n" " -o fuse2fs_debug enable fuse2fs debugging\n" + " -o inusefile=<file> file to show that fuse is still using the file system image\n" "\n", outargs->argv[0]); if (key == FUSE2FS_HELPFULL) { @@ -3987,6 +3990,24 @@ fctx.alloc_all_blocks = 1; } + if(fctx.inusefile) { + FILE* inusefile=fopen(fctx.inusefile, "w"); + if(!inusefile) { + fprintf(stderr, "Requested inusefile=%s but couldn't open the file for writing\n", fctx.inusefile); + exit(1); + } + fclose(inusefile); + char* resolved = realpath(fctx.inusefile, NULL); + if (!resolved) { + perror("realpath"); + fprintf(stderr, "Could not resolve realpath for inusefile=%s\n", fctx.inusefile); + unlink(fctx.inusefile); + exit(1); + } + free(fctx.inusefile); + fctx.inusefile = resolved; + } + /* Start up the fs (while we still can use stdout) */ ret = 2; if (!fctx.ro) @@ -4107,6 +4128,11 @@ com_err(argv[0], err, "while closing fs"); global_fs = NULL; } + if(fctx.inusefile) { + err = unlink(fctx.inusefile); + if (err) + com_err(argv[0], "unlink: %s while unlinking '%s'", strerror(errno), fctx.inusefile); + } return ret; }
diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c e2fsprogs-1.47.2~rc1/misc/fuse2fs.c --- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000 +++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000 @@ -2040,6 +2040,147 @@ return ret; } +struct block_context { + e2_blkcnt_t next_block; + off_t blksize; + off_t offset; + off_t pos; + off_t next_hole; + off_t next_data; +}; + +static int +dumponeblock(ext2_filsys fs, blk64_t *blocknr, e2_blkcnt_t blockcnt, + blk64_t ref_block, int ref_offset, void * privdata) +{ + struct block_context *p; + e2_blkcnt_t i; + + p = (struct block_context *)privdata; + printf("p->pos = %ld p->offset=%ld blockcnt=%lld\n", p->pos, p->offset, blockcnt); + + // Stepping over a hole + e2_blkcnt_t holesize = blockcnt - p->next_block; + if (p->pos <= p->offset && p->pos + holesize * p->blksize > p->offset) { + // offset is in this hole + p->next_hole = p->offset; + } else if (p->pos > p->offset && p->pos < p->next_hole) { + // First hole after offset + p->next_hole = p->pos; + } + p->pos += p->blksize * holesize; + + // A data block + p->next_block = blockcnt + 1; + if (p->pos <= p->offset && p->pos + p->blksize > p->offset) { + // offset is in this data block + p->next_data = p->offset; + } else if (p->pos > p->offset && p->pos < p->next_data) { + // first data block after offset + p->next_data = p->pos; + } + p->pos += p->blksize; + return 0; +} + + +static off_t op_lseek(const char* path, off_t offset, int whence, struct fuse_file_info *fp) +{ + struct fuse_context *ctxt = fuse_get_context(); + struct fuse2fs *ff = (struct fuse2fs *)ctxt->private_data; + struct fuse2fs_file_handle *fh = + (struct fuse2fs_file_handle *)(uintptr_t)fp->fh; + ext2_filsys fs; + struct ext2_inode_large inode; + blk64_t start, end; + __u64 i_size; + errcode_t err; + int flags; + + FUSE2FS_CHECK_CONTEXT(ff); + fs = ff->fs; + FUSE2FS_CHECK_MAGIC(fs, fh, FUSE2FS_FILE_MAGIC); + + memset(&inode, 0, sizeof(inode)); + err = ext2fs_read_inode_full(fs, fh->ino, (struct ext2_inode *)&inode, + sizeof(inode)); + if (err) + return err; + i_size = EXT2_I_SIZE(&inode); + + if (offset >= i_size) + return -ENXIO; + + struct block_context bc = { + .next_block = 0, + .blksize = fs->blocksize, + .offset = offset, + .pos = 0, + .next_hole = i_size, + .next_data = i_size, + }; + + if (inode.i_mode & S_IFREG && inode.i_flags & EXT4_EXTENTS_FL) { + ext2_extent_handle_t handle = NULL; + struct ext2fs_extent extent; + int op = EXT2_EXTENT_ROOT; + + err = ext2fs_extent_open(fs, fh->ino, &handle); + if (err) { + // Why doesn't op_create do this? + err = translate_error(fs, fh->ino, err); + return err; + } + while (1) { + err = ext2fs_extent_get(handle, op, &extent); + if (err == EXT2_ET_EXTENT_NO_NEXT) + break; + if (err) { + err = translate_error(fs, fh->ino, err); + ext2fs_extent_free(handle); + return err; + } + op = EXT2_EXTENT_NEXT; + + if (extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT) { + continue; + } + if (!(extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)) { + continue; + } + + blk64_t start = extent.e_pblk; + e2_blkcnt_t blockcnt = extent.e_lblk; + for(blk64_t blocknr = start; blocknr < start + extent.e_len; ++blocknr, ++blockcnt) { + // TODO We can be much more efficient here + dumponeblock(fs, &blocknr, blockcnt, 0, 0, &bc); + } + } + ext2fs_extent_free(handle); + } else if (inode.i_mode & S_IFREG && inode.i_flags & EXT4_INLINE_DATA_FL) { + if (whence == SEEK_DATA) { + return offset; + } else { + return i_size; + } + } else { + ext2fs_block_iterate3(fs, fh->ino, BLOCK_FLAG_DATA_ONLY, NULL, dumponeblock, &bc); + } + + /* deal with holes at the end of the inode */ + if (i_size > bc.pos) { + if (bc.next_hole == i_size) + bc.next_hole = bc.pos > bc.offset ? bc.pos : bc.offset; + } + + if (whence == SEEK_DATA) { + if (bc.next_data == i_size) return -ENXIO; + return bc.next_data; + } else { + return bc.next_hole; + } +} + static int op_truncate(const char *path, off_t len #if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) , struct fuse_file_info *fi EXT2FS_ATTR((unused)) @@ -3695,6 +3836,7 @@ .fallocate = op_fallocate, # endif #endif + .lseek = op_lseek, }; static int get_random_bytes(void *p, size_t sz)