Re: [PATCH v2 03/20] fuse: Implement standard FUSE operations

Max Reitz Thu, 15 Oct 2020 08:22:12 -0700

On 15.10.20 11:46, Kevin Wolf wrote:
> Am 22.09.2020 um 12:49 hat Max Reitz geschrieben:
>> This makes the export actually useful instead of only producing errors
>> whenever it is accessed.
>>
>> Signed-off-by: Max Reitz <[email protected]>
>> ---
>>  block/export/fuse.c | 226 ++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 226 insertions(+)
>>
>> diff --git a/block/export/fuse.c b/block/export/fuse.c
>> index 75f11d2514..8fc667231d 100644
>> --- a/block/export/fuse.c
>> +++ b/block/export/fuse.c
>> @@ -32,6 +32,10 @@
>>  #include <fuse_lowlevel.h>
>>  
>>  
>> +/* Prevent overly long bounce buffer allocations */
>> +#define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 
>> 1024))
>> +
>> +
>>  typedef struct FuseExport {
>>      BlockExport common;
>>  
>> @@ -241,7 +245,229 @@ static bool is_regular_file(const char *path, Error 
>> **errp)
>>      return true;
>>  }
>>  
>> +
>> +/**
>> + * Let clients look up files.  Always return ENOENT because we only
>> + * care about the mountpoint itself.
>> + */
>> +static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
>> +{
>> +    fuse_reply_err(req, ENOENT);
>> +}
>> +
>> +/**
>> + * Let clients get file attributes (i.e., stat() the file).
>> + */
>> +static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
>> +                         struct fuse_file_info *fi)
>> +{
>> +    struct stat statbuf;
>> +    int64_t length, allocated_blocks;
>> +    time_t now = time(NULL);
>> +    ImageInfo *info = NULL;
>> +    FuseExport *exp = fuse_req_userdata(req);
>> +    mode_t mode;
>> +    Error *local_error = NULL;
>> +
>> +    length = blk_getlength(exp->common.blk);
>> +    if (length < 0) {
>> +        fuse_reply_err(req, -length);
>> +        return;
>> +    }
>> +
>> +    bdrv_query_image_info(blk_bs(exp->common.blk), &info, &local_error);
>> +    if (local_error) {
>> +        allocated_blocks = DIV_ROUND_UP(length, 512);
>> +    } else {
>> +        allocated_blocks = DIV_ROUND_UP(info->actual_size, 512);
>> +    }
>> +
>> +    qapi_free_ImageInfo(info);
>> +    error_free(local_error);
>> +    local_error = NULL;
> 
> If you only use info->actual_size, why not directly call
> bdrv_get_allocated_file_size()?


🤔

Sometimes one doesn’t think of the simplest things.

>> +
>> +    mode = S_IFREG | S_IRUSR;
>> +    if (exp->writable) {
>> +        mode |= S_IWUSR;
>> +    }
>> +
>> +    statbuf = (struct stat) {
>> +        .st_ino     = inode,
>> +        .st_mode    = mode,
>> +        .st_nlink   = 1,
>> +        .st_uid     = getuid(),
>> +        .st_gid     = getgid(),
>> +        .st_size    = length,
>> +        .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
>> +        .st_blocks  = allocated_blocks,
>> +        .st_atime   = now,
>> +        .st_mtime   = now,
>> +        .st_ctime   = now,
>> +    };
>> +
>> +    fuse_reply_attr(req, &statbuf, 1.);
>> +}
>> +
>> +static int fuse_do_truncate(const FuseExport *exp, int64_t size,
>> +                            PreallocMode prealloc)
>> +{
>> +    uint64_t blk_perm, blk_shared_perm;
>> +    int ret;
>> +
>> +    blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
>> +
>> +    ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
>> +                       blk_shared_perm, NULL);
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>> +
>> +    ret = blk_truncate(exp->common.blk, size, true, prealloc, 0, NULL);
>> +
>> +    /* Must succeed, because we are only giving up the RESIZE permission */
>> +    blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort);
>> +
>> +    return ret;
>> +}
>> +
>> +/**
>> + * Let clients set file attributes.  Only resizing is supported.
>> + */
>> +static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat 
>> *statbuf,
>> +                         int to_set, struct fuse_file_info *fi)
>> +{
>> +    FuseExport *exp = fuse_req_userdata(req);
>> +    int ret;
>> +
>> +    if (!exp->writable) {
>> +        fuse_reply_err(req, EACCES);
>> +        return;
>> +    }
>> +
>> +    if (to_set & ~FUSE_SET_ATTR_SIZE) {
>> +        fuse_reply_err(req, ENOTSUP);
>> +        return;
>> +    }
>> +
>> +    ret = fuse_do_truncate(exp, statbuf->st_size, PREALLOC_MODE_OFF);
>> +    if (ret < 0) {
>> +        fuse_reply_err(req, -ret);
>> +        return;
>> +    }
>> +
>> +    fuse_getattr(req, inode, fi);
>> +}
>> +
>> +/**
>> + * Let clients open a file (i.e., the exported image).
>> + */
>> +static void fuse_open(fuse_req_t req, fuse_ino_t inode,
>> +                      struct fuse_file_info *fi)
>> +{
>> +    fuse_reply_open(req, fi);
>> +}
>> +
>> +/**
>> + * Handle client reads from the exported image.
>> + */
>> +static void fuse_read(fuse_req_t req, fuse_ino_t inode,
>> +                      size_t size, off_t offset, struct fuse_file_info *fi)
>> +{
>> +    FuseExport *exp = fuse_req_userdata(req);
>> +    int64_t length;
>> +    void *buf;
>> +    int ret;
>> +
>> +    /**
>> +     * Clients will expect short reads at EOF, so we have to limit
>> +     * offset+size to the image length.
>> +     */
>> +    length = blk_getlength(exp->common.blk);
>> +    if (length < 0) {
>> +        fuse_reply_err(req, -length);
>> +        return;
>> +    }
>> +
>> +    size = MIN(size, FUSE_MAX_BOUNCE_BYTES);
> 
> "Read should send exactly the number of bytes requested except on EOF or
> error, otherwise the rest of the data will be substituted with zeroes."

:(

> Do we corrupt huge reads with this, so that read() succeeds, but the
> buffer is zeroed above 64M instead of containing the correct data? Maybe
> we should return an error instead?

Hm.  It looks like there is a max_read option obeyed by the kernel
driver, and it appears it’s set by implementing .init() and setting
fuse_conn_info.max_read (also, “for the time being” it has to also set
in the mount options passed to fuse_session_new()).

I’m not sure whether that does anything, though.  It appears that
whenever I do a cached read, it caps out at 128k (which is what
cuse_prep_data() in libfuse sets; though increasing that number there
doesn’t change anything, so I think that’s just a coincidence), and with
O_DIRECT, it caps out at 1M.

But at least that would be grounds to return an error for >64M requests.
 (Limiting max_read to 64k does limit all read requests to 64k.)

Further investigation is needed.

> (It's kind of sad that we need a bounce buffer from which data is later
> copied instead of being provided the right memory by the kernel.)

Yes, it is.

>> +    if (offset + size > length) {
>> +        size = length - offset;
>> +    }
>> +
>> +    buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
>> +    if (!buf) {
>> +        fuse_reply_err(req, ENOMEM);
>> +        return;
>> +    }
>> +
>> +    ret = blk_pread(exp->common.blk, offset, buf, size);
>> +    if (ret >= 0) {
>> +        fuse_reply_buf(req, buf, size);
>> +    } else {
>> +        fuse_reply_err(req, -ret);
>> +    }
>> +
>> +    qemu_vfree(buf);
>> +}
>> +
>> +/**
>> + * Handle client writes to the exported image.
>> + */
>> +static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
>> +                       size_t size, off_t offset, struct fuse_file_info *fi)
>> +{
>> +    FuseExport *exp = fuse_req_userdata(req);
>> +    int64_t length;
>> +    int ret;
>> +
>> +    if (!exp->writable) {
>> +        fuse_reply_err(req, EACCES);
>> +        return;
>> +    }
>> +
>> +    /**
>> +     * Clients will expect short writes at EOF, so we have to limit
>> +     * offset+size to the image length.
>> +     */
>> +    length = blk_getlength(exp->common.blk);
>> +    if (length < 0) {
>> +        fuse_reply_err(req, -length);
>> +        return;
>> +    }
>> +
>> +    size = MIN(size, BDRV_REQUEST_MAX_BYTES);
> 
> We're only supposed to do short writes on EOF, so this has a similar
> problem as in fuse_read, except that BDRV_REQUEST_MAX_BYTES is much
> higher and it's not specified what the resulting misbehaviour could be
> (possibly the kernel not retrying write for the rest of the buffer?)

As for reading above, we can (and should) probably set max_write.

>> +    if (offset + size > length) {
>> +        size = length - offset;
>> +    }
>> +
>> +    ret = blk_pwrite(exp->common.blk, offset, buf, size, 0);
>> +    if (ret >= 0) {
>> +        fuse_reply_write(req, size);
>> +    } else {
>> +        fuse_reply_err(req, -ret);
>> +    }
>> +}
>> +
>> +/**
>> + * Let clients flush the exported image.
>> + */
>> +static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
>> +                       struct fuse_file_info *fi)
>> +{
>> +    FuseExport *exp = fuse_req_userdata(req);
>> +    int ret;
>> +
>> +    ret = blk_flush(exp->common.blk);
>> +    fuse_reply_err(req, ret < 0 ? -ret : 0);
>> +}
> 
> This seems to be an implementation for .fsync rather than for .flush.

Wouldn’t fsync entail a drain?

Or is it the opposite, flush should just drain and not invoke
blk_flush()?  (Sorry, this all gets me confused every time.)

(Though I do think .fsync should both flush and drain.)

> Hmm, or maybe actually for both? We usually do bdrv_flush() during
> close, so it would be consistent to do the same here. It's our last
> chance to report an error to the user before the file is closed.

I don’t understand what you mean.  What is “the same”?  Closing the
image?  Or indeed having .flush() be implemented with blk_flush()?

Do you mean that other parties may do the same as qemu does, i.e. flush
files before they are closed, which is why we should anticipate the same
and give users a chance to see errors?

Max

signature.asc
Description: OpenPGP digital signature

Re: [PATCH v2 03/20] fuse: Implement standard FUSE operations

Reply via email to