Package: e2fsprogs
Version: 1.47.2~rc1-2~bpo12+2
Severity: normal

Dear Maintainer,

when doing rm of a very large file (or possibly just a file whose block 
extends past 2^32-1) it fails with 

FUSE2FS-remove_inode: put ino=12 links=1
fuse: bad error value: 75
   unique: 16, error: -34 (Numerical result out of range), outsize: 16
rm: cannot remove 'mount/filler': Numerical result out of range

rm in debugfs does work. Attached patch replaces the deallocate block 
logic with that from debugfs.

This possibly points to a bug in the underlying ext2_punch routine that 
fuse2fs is using but I haven't tried to investigate further. This 
function is also called from punch_helper so I suspect that using 
fallocate to punch holes might also have problems.

Four files are attached:
rm-fix.patch - the actual fix for this bug

test.sh - a short test script to trigger this bug
inusefile.patch - adds support for the -o inusefile= flag that the test 
uses. If you don't apply this patch then you'll need to add a large 
sleep after the fusermount -u calls instead (c50s is required on my 
system)
lseek.patch - irrelevant to this fix but the other two patches are built 
on it and will (probably) apply but with line offsets. This adds support 
for SEEK_HOLE and SEEK_DATA. I intend to send this upstream eventually 
but I've got no time to work on writing any tests right now - but it's 
working perfectly for my usecase.

N.B. The test takes around 10-15 minutes to run, most of the time is in 
the rm and requires c1.5GB of disk space. It will not clean up properly 
if it fails part way through.

Tim.

-- System Information:
Debian Release: 12.11
  APT prefers stable-security
  APT policy: (500, 'stable-security'), (500, 'stable')
Architecture: amd64 (x86_64)

Kernel: Linux 6.1.0-35-amd64 (SMP w/4 CPU threads; PREEMPT)
Kernel taint flags: TAINT_WARN
Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8), LANGUAGE not set
Shell: /bin/sh linked to /usr/bin/dash
Init: sysvinit (via /sbin/init)

Versions of packages e2fsprogs depends on:
ii  libblkid1    2.38.1-5+deb12u3
ii  libc6        2.36-9+deb12u10
ii  libcom-err2  1.47.0-2
ii  libext2fs2   1.47.2~rc1-2~bpo12+2
ii  libss2       1.47.0-2
ii  libuuid1     2.38.1-5+deb12u3
ii  logsave      1.47.0-2

Versions of packages e2fsprogs recommends:
pn  e2fsprogs-l10n  <none>

Versions of packages e2fsprogs suggests:
pn  e2fsck-static  <none>
ii  fuse2fs        1.47.2~rc1-2~bpo12+2
pn  gpart          <none>
pn  parted         <none>

-- no debconf information
diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 
e2fsprogs-1.47.2~rc1/misc/fuse2fs.c
--- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c    2024-11-29 08:02:27.000000000 
+0000
+++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000
@@ -1237,6 +1237,25 @@
        return update_mtime(fs, dir, NULL);
 }
 
+static int release_blocks_proc(ext2_filsys fs, blk64_t *blocknr,
+                              e2_blkcnt_t blockcnt EXT2FS_ATTR((unused)),
+                              blk64_t ref_block EXT2FS_ATTR((unused)),
+                              int ref_offset EXT2FS_ATTR((unused)),
+                              void *private)
+{
+       blk64_t block = *blocknr;
+       blk64_t *last_cluster = (blk64_t *)private;
+       blk64_t cluster = EXT2FS_B2C(fs, block);
+
+       if (cluster == *last_cluster)
+               return 0;
+
+       *last_cluster = cluster;
+
+       ext2fs_block_alloc_stats2(fs, block, -1);
+       return 0;
+}
+
 static int remove_inode(struct fuse2fs *ff, ext2_ino_t ino)
 {
        ext2_filsys fs = ff->fs;
@@ -1278,8 +1297,11 @@
                goto write_out;
 
        if (ext2fs_inode_has_valid_blocks2(fs, (struct ext2_inode *)&inode)) {
-               err = ext2fs_punch(fs, ino, (struct ext2_inode *)&inode, NULL,
-                                  0, ~0ULL);
+               blk64_t last_cluster = 0;
+               ext2fs_block_iterate3(fs, ino, BLOCK_FLAG_READ_ONLY,
+                                     NULL, release_blocks_proc, &last_cluster);
+//             err = ext2fs_punch(fs, ino, (struct ext2_inode *)&inode, NULL,
+//                                0, ~0ULL);
                if (err) {
                        ret = translate_error(fs, ino, err);
                        goto write_out;
#!/bin/bash

set -e

ROOT=mount
DEV=container_mount/pv1

rm -f container
rm -fr container_mount
rm -fr mount

mkdir -p container_mount

# create a container fs that can hold a 5T sparse file
truncate -s 3G container
/sbin/mke2fs -t ext4 -O \
  
none,has_journal,ext_attr,dir_index,filetype,extent,64bit,flex_bg,sparse_super,large_file,huge_file,dir_nlink,extra_isize,metadata_csum
 \
  -b 4096 container

fuse2fs -o fakeroot -o inusefile=container.inuse container container_mount

mkdir -p "$ROOT"

echo "truncate $(date)"
time truncate -s 5T "${DEV}"

echo "mke2fs $(date)"
time /sbin/mkfs.ext4 -N 1000000 -O \
  
none,has_journal,ext_attr,dir_index,filetype,extent,64bit,flex_bg,sparse_super,large_file,huge_file,dir_nlink,extra_isize,metadata_csum
 \
  -b 1024 "$DEV"

echo "fuse2fs ${DEV} ${ROOT} $(date)"
time fuse2fs -o fakeroot -o inusefile="$DEV.inuse" "$DEV" "$ROOT"

echo "make filler $(date) - this is slow"
time fallocate -l 4294967295K "${ROOT}/filler"

echo "fusermount -u $ROOT $(date)"
time fusermount -u "$ROOT"

while [[ -f "${DEV}.inuse" ]]; do
  sleep 10
  echo "Waiting for fuse to complete"
done

echo "fuse2fs ${DEV} ${ROOT} $(date)"
time fuse2fs -o fakeroot -o inusefile="$DEV.inuse" "$DEV" "$ROOT"

echo "rm filler $(date) - this is slow"
time rm "${ROOT}/filler"

echo "fusermount -u $ROOT $(date)"
time fusermount -u "$ROOT"

while [[ -f "${DEV}.inuse" ]]; do
  sleep 10
  echo "Waiting for fuse to complete"
done

fusermount -u container_mount

while [[ -f container.inuse ]]; do
  echo "Waiting for container fuse"
  sleep 10
done

rm container
rmdir $ROOT
rmdir container_mount

exit 0
diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 
e2fsprogs-1.47.2~rc1/misc/fuse2fs.c
--- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c    2024-11-29 08:02:27.000000000 
+0000
+++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000
@@ -348,6 +348,7 @@
        unsigned long offset;
        FILE *err_fp;
        unsigned int next_generation;
+       char* inusefile;
 };
 
 #define FUSE2FS_CHECK_MAGIC(fs, ptr, num) do {if ((ptr)->magic != (num)) \
@@ -3873,6 +3874,7 @@
        FUSE2FS_OPT("no_default_opts",  no_default_opts,        1),
        FUSE2FS_OPT("norecovery",       norecovery,             1),
        FUSE2FS_OPT("offset=%lu",       offset,         0),
+       FUSE2FS_OPT("inusefile=%s",     inusefile,              0),
 
        FUSE_OPT_KEY("-V",             FUSE2FS_VERSION),
        FUSE_OPT_KEY("--version",      FUSE2FS_VERSION),
@@ -3914,6 +3916,7 @@
        "    -o offset=<bytes>      similar to mount -o offset=<bytes>, mount 
the partition starting at <bytes>\n"
        "    -o norecovery          don't replay the journal (implies ro)\n"
        "    -o fuse2fs_debug       enable fuse2fs debugging\n"
+       "    -o inusefile=<file>    file to show that fuse is still using the 
file system image\n"
        "\n",
                        outargs->argv[0]);
                if (key == FUSE2FS_HELPFULL) {
@@ -3987,6 +3990,24 @@
                fctx.alloc_all_blocks = 1;
        }
 
+       if(fctx.inusefile) {
+               FILE* inusefile=fopen(fctx.inusefile, "w");
+               if(!inusefile) {
+                       fprintf(stderr, "Requested inusefile=%s but couldn't 
open the file for writing\n", fctx.inusefile);
+                       exit(1);
+               }
+               fclose(inusefile);
+               char* resolved = realpath(fctx.inusefile, NULL);
+               if (!resolved) {
+                       perror("realpath");
+                       fprintf(stderr, "Could not resolve realpath for 
inusefile=%s\n", fctx.inusefile);
+                       unlink(fctx.inusefile);
+                       exit(1);
+               }
+               free(fctx.inusefile);
+               fctx.inusefile = resolved;
+       }
+
        /* Start up the fs (while we still can use stdout) */
        ret = 2;
        if (!fctx.ro)
@@ -4107,6 +4128,11 @@
                        com_err(argv[0], err, "while closing fs");
                global_fs = NULL;
        }
+       if(fctx.inusefile) {
+               err = unlink(fctx.inusefile);
+               if (err)
+                       com_err(argv[0], "unlink: %s while unlinking '%s'", 
strerror(errno), fctx.inusefile);
+       }
        return ret;
 }
 
diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 
e2fsprogs-1.47.2~rc1/misc/fuse2fs.c
--- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c    2024-11-29 08:02:27.000000000 
+0000
+++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000
@@ -2040,6 +2040,147 @@
        return ret;
 }
 
+struct block_context {
+       e2_blkcnt_t next_block;
+       off_t blksize;
+       off_t offset;
+       off_t pos;
+       off_t next_hole;
+       off_t next_data;
+};
+
+static int
+dumponeblock(ext2_filsys fs, blk64_t *blocknr, e2_blkcnt_t blockcnt,
+      blk64_t ref_block, int ref_offset, void * privdata)
+{
+       struct block_context *p;
+       e2_blkcnt_t i;
+
+       p = (struct block_context *)privdata;
+       printf("p->pos = %ld p->offset=%ld blockcnt=%lld\n", p->pos, p->offset, 
blockcnt);
+
+       // Stepping over a hole
+       e2_blkcnt_t holesize = blockcnt - p->next_block;
+       if (p->pos <= p->offset && p->pos + holesize * p->blksize > p->offset) {
+               // offset is in this hole
+               p->next_hole = p->offset;
+       } else if (p->pos > p->offset && p->pos < p->next_hole) {
+               // First hole after offset
+               p->next_hole = p->pos;
+       }
+       p->pos += p->blksize * holesize;
+
+       // A data block
+       p->next_block = blockcnt + 1;
+       if (p->pos <= p->offset && p->pos + p->blksize > p->offset) {
+               // offset is in this data block
+               p->next_data = p->offset;
+       } else if (p->pos > p->offset && p->pos < p->next_data) {
+               // first data block after offset
+               p->next_data = p->pos;
+       }
+       p->pos += p->blksize;
+       return 0;
+}
+
+
+static off_t op_lseek(const char* path, off_t offset, int whence, struct 
fuse_file_info *fp)
+{
+       struct fuse_context *ctxt = fuse_get_context();
+       struct fuse2fs *ff = (struct fuse2fs *)ctxt->private_data;
+       struct fuse2fs_file_handle *fh =
+               (struct fuse2fs_file_handle *)(uintptr_t)fp->fh;
+       ext2_filsys fs;
+       struct ext2_inode_large inode;
+       blk64_t start, end;
+       __u64 i_size;
+       errcode_t err;
+       int flags;
+
+       FUSE2FS_CHECK_CONTEXT(ff);
+       fs = ff->fs;
+       FUSE2FS_CHECK_MAGIC(fs, fh, FUSE2FS_FILE_MAGIC);
+
+       memset(&inode, 0, sizeof(inode));
+       err = ext2fs_read_inode_full(fs, fh->ino, (struct ext2_inode *)&inode,
+                                    sizeof(inode));
+       if (err)
+               return err;
+       i_size = EXT2_I_SIZE(&inode);
+
+       if (offset >= i_size)
+               return -ENXIO;
+
+       struct block_context bc = {
+               .next_block = 0,
+               .blksize = fs->blocksize,
+               .offset = offset,
+               .pos = 0,
+               .next_hole = i_size,
+               .next_data = i_size,
+       };
+
+       if (inode.i_mode & S_IFREG && inode.i_flags & EXT4_EXTENTS_FL) {
+               ext2_extent_handle_t handle = NULL;
+               struct ext2fs_extent extent;
+               int op = EXT2_EXTENT_ROOT;
+
+               err = ext2fs_extent_open(fs, fh->ino, &handle);
+               if (err) {
+                       // Why doesn't op_create do this?
+                       err = translate_error(fs, fh->ino, err);
+                       return err;
+               }
+               while (1) {
+                       err = ext2fs_extent_get(handle, op, &extent);
+                       if (err == EXT2_ET_EXTENT_NO_NEXT)
+                               break;
+                       if (err) {
+                               err = translate_error(fs, fh->ino, err);
+                               ext2fs_extent_free(handle);
+                               return err;
+                       }
+                       op = EXT2_EXTENT_NEXT;
+
+                       if (extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT) {
+                               continue;
+                       }
+                       if (!(extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)) {
+                               continue;
+                       }
+
+                       blk64_t start = extent.e_pblk;
+                       e2_blkcnt_t blockcnt = extent.e_lblk;
+                       for(blk64_t blocknr = start; blocknr < start + 
extent.e_len; ++blocknr, ++blockcnt) {
+                               // TODO We can be much more efficient here
+                               dumponeblock(fs, &blocknr, blockcnt, 0, 0, &bc);
+                       }
+               }
+               ext2fs_extent_free(handle);
+       } else if (inode.i_mode & S_IFREG && inode.i_flags & 
EXT4_INLINE_DATA_FL) {
+               if (whence == SEEK_DATA) {
+                       return offset;
+               } else {
+                       return i_size;
+               }
+       } else {
+               ext2fs_block_iterate3(fs, fh->ino, BLOCK_FLAG_DATA_ONLY, NULL, 
dumponeblock, &bc);
+       }
+
+       /* deal with holes at the end of the inode */
+       if (i_size > bc.pos) {
+               if (bc.next_hole == i_size)
+                       bc.next_hole = bc.pos > bc.offset ? bc.pos : bc.offset;
+       }
+
+       if (whence == SEEK_DATA) {
+               if (bc.next_data == i_size) return -ENXIO;
+               return bc.next_data;
+       } else {
+               return bc.next_hole;
+       }
+}
+
 static int op_truncate(const char *path, off_t len
 #if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0)
                        , struct fuse_file_info *fi EXT2FS_ATTR((unused))
@@ -3695,6 +3836,7 @@
        .fallocate = op_fallocate,
 # endif
 #endif
+       .lseek = op_lseek,
 };
 
 static int get_random_bytes(void *p, size_t sz)

Reply via email to