Am 13.11.2013 um 07:27 schrieb Peter Lieven <[email protected]>:
>
> Am 12.11.2013 um 16:49 schrieb Paolo Bonzini <[email protected]>:
>
>> Writing zeroes to a file can be done by punching a hole if MAY_UNMAP
>> is set.
>>
>> Note that in this case handle_aiocb_discard's ENOTSUP return code
>> is not ignored, but makes the block layer fall back to the generic
>> implementation.
>>
>> Signed-off-by: Paolo Bonzini <[email protected]>
>> ---
>> block/raw-posix.c | 64
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> trace-events | 1 +
>> 2 files changed, 64 insertions(+), 1 deletion(-)
>>
>> diff --git a/block/raw-posix.c b/block/raw-posix.c
>> index 27fe47d..830e109 100644
>> --- a/block/raw-posix.c
>> +++ b/block/raw-posix.c
>> @@ -142,6 +142,7 @@ typedef struct BDRVRawState {
>> bool is_xfs : 1;
>> #endif
>> bool has_discard : 1;
>> + bool discard_zeroes : 1;
>> } BDRVRawState;
>>
>> typedef struct BDRVRawReopenState {
>> @@ -283,6 +284,7 @@ static int raw_open_common(BlockDriverState *bs, QDict
>> *options,
>> Error *local_err = NULL;
>> const char *filename;
>> int fd, ret;
>> + struct stat st;
>>
>> opts = qemu_opts_create_nofail(&raw_runtime_opts);
>> qemu_opts_absorb_qdict(opts, options, &local_err);
>> @@ -325,6 +327,15 @@ static int raw_open_common(BlockDriverState *bs, QDict
>> *options,
>> #endif
>>
>> s->has_discard = true;
>> +
>> + if (fstat(s->fd, &st) < 0) {
>> + error_setg_errno(errp, errno, "Could not stat file");
>> + goto fail;
>> + }
>> + if (S_ISREG(st.st_mode)) {
>> + s->discard_zeroes = true;
>> + }
>> +
>> #ifdef CONFIG_XFS
>> if (platform_test_xfs_fd(s->fd)) {
>> s->is_xfs = true;
>> @@ -788,6 +799,29 @@ static int aio_worker(void *arg)
>> return ret;
>> }
>>
>> +static int paio_submit_co(BlockDriverState *bs, int fd,
>> + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
>> + int type)
>> +{
>> + RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
>> + ThreadPool *pool;
>> +
>> + acb->bs = bs;
>> + acb->aio_type = type;
>> + acb->aio_fildes = fd;
>> +
>> + if (qiov) {
>> + acb->aio_iov = qiov->iov;
>> + acb->aio_niov = qiov->niov;
>> + }
>> + acb->aio_nbytes = nb_sectors * 512;
>> + acb->aio_offset = sector_num * 512;
>> +
>> + trace_paio_submit_co(sector_num, nb_sectors, type);
>> + pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
>> + return thread_pool_submit_co(pool, aio_worker, acb);
>> +}
>> +
>> static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
>> int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
>> BlockDriverCompletionFunc *cb, void *opaque, int type)
>> @@ -1200,6 +1234,31 @@ static coroutine_fn BlockDriverAIOCB
>> *raw_aio_discard(BlockDriverState *bs,
>> cb, opaque, QEMU_AIO_DISCARD);
>> }
>>
>> +static int coroutine_fn raw_co_write_zeroes(
>> + BlockDriverState *bs, int64_t sector_num,
>> + int nb_sectors, BdrvRequestFlags flags)
>> +{
>> + BDRVRawState *s = bs->opaque;
>> +
>> + if (!(flags & BDRV_REQ_MAY_UNMAP)) {
>> + return -ENOTSUP;
>> + }
>> + if (!s->discard_zeroes) {
>> + return -ENOTSUP;
>> + }
>> + return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
>> + QEMU_AIO_DISCARD);
>> +}
>> +
>> +static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
>> +{
>> + BDRVRawState *s = bs->opaque;
>> +
>> + bdi->unallocated_blocks_are_zero = s->discard_zeroes;
>> + bdi->can_write_zeroes_with_unmap = s->discard_zeroes;
>
> does BLKDISCARDZEROES ioctl guarantee that a device is
> zero initialized or does it just guarantee that a discard may not
> fail and that it reads as zeroes afterwards?
Please ignore this. We are talking about a file here.
Peter
>
> Peter
>
>> + return 0;
>> +}
>> +
>> static QEMUOptionParameter raw_create_options[] = {
>> {
>> .name = BLOCK_OPT_SIZE,
>> @@ -1223,6 +1282,7 @@ static BlockDriver bdrv_file = {
>> .bdrv_create = raw_create,
>> .bdrv_has_zero_init = bdrv_has_zero_init_1,
>> .bdrv_co_get_block_status = raw_co_get_block_status,
>> + .bdrv_co_write_zeroes = raw_co_write_zeroes,
>>
>> .bdrv_aio_readv = raw_aio_readv,
>> .bdrv_aio_writev = raw_aio_writev,
>> @@ -1231,6 +1291,7 @@ static BlockDriver bdrv_file = {
>>
>> .bdrv_truncate = raw_truncate,
>> .bdrv_getlength = raw_getlength,
>> + .bdrv_get_info = raw_get_info,
>> .bdrv_get_allocated_file_size
>> = raw_get_allocated_file_size,
>>
>> @@ -1586,6 +1647,7 @@ static BlockDriver bdrv_host_device = {
>>
>> .bdrv_truncate = raw_truncate,
>> .bdrv_getlength = raw_getlength,
>> + .bdrv_get_info = raw_get_info,
>> .bdrv_get_allocated_file_size
>> = raw_get_allocated_file_size,
>>
>> @@ -1715,7 +1777,7 @@ static BlockDriver bdrv_host_floppy = {
>> .bdrv_aio_flush = raw_aio_flush,
>>
>> .bdrv_truncate = raw_truncate,
>> - .bdrv_getlength = raw_getlength,
>> + .bdrv_getlength = raw_getlength,
>> .has_variable_length = true,
>> .bdrv_get_allocated_file_size
>> = raw_get_allocated_file_size,
>> diff --git a/trace-events b/trace-events
>> index 96b3974..995c84a 100644
>> --- a/trace-events
>> +++ b/trace-events
>> @@ -128,6 +128,7 @@ thread_pool_cancel(void *req, void *opaque) "req %p
>> opaque %p"
>>
>> # block/raw-win32.c
>> # block/raw-posix.c
>> +paio_submit_co(int64_t sector_num, int nb_sectors, int type) "sector_num
>> %"PRId64" nb_sectors %d type %d"
>> paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int
>> type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
>>
>> # ioport.c
>> --
>> 1.8.4.2
>>
>>
>