On Mon, Jun 05, 2023 at 06:41:07PM +0800, Sam Li wrote: > By adding zone operations and zoned metadata, the zoned emulation > capability enables full emulation support of zoned device using > a qcow2 file. The zoned device metadata includes zone type, > zoned device state and write pointer of each zone, which is stored > to an array of unsigned integers. > > Each zone of a zoned device makes state transitions following > the zone state machine. The zone state machine mainly describes > five states, IMPLICIT OPEN, EXPLICIT OPEN, FULL, EMPTY and CLOSED. > READ ONLY and OFFLINE states will generally be affected by device > internal events. The operations on zones cause corresponding state > changing. > > Zoned devices have a limit on zone resources, which puts constraints on > write operations into zones. > > Signed-off-by: Sam Li <[email protected]> > --- > block/qcow2.c | 629 +++++++++++++++++++++++++++++++++++++++++++++++++- > block/qcow2.h | 2 + > 2 files changed, 629 insertions(+), 2 deletions(-) > > diff --git a/block/qcow2.c b/block/qcow2.c > index b886dab42b..f030965d5d 100644 > --- a/block/qcow2.c > +++ b/block/qcow2.c > @@ -194,6 +194,164 @@ qcow2_extract_crypto_opts(QemuOpts *opts, const char > *fmt, Error **errp) > return cryptoopts_qdict; > } > > +#define QCOW2_ZT_IS_CONV(wp) (wp & 1ULL << 59) > + > +static inline int qcow2_get_wp(uint64_t wp) > +{ > + /* clear state and type information */ > + return ((wp << 5) >> 5); > +} > + > +static inline int qcow2_get_zs(uint64_t wp) > +{ > + return (wp >> 60); > +} > + > +static inline void qcow2_set_wp(uint64_t *wp, BlockZoneState zs) > +{ > + uint64_t addr = qcow2_get_wp(*wp); > + addr |= ((uint64_t)zs << 60); > + *wp = addr; > +} > + > +/* > + * File wp tracking: reset zone, finish zone and append zone can > + * change the value of write pointer. All zone operations will change > + * the state of that/those zone. > + * */ > +static inline void qcow2_wp_tracking_helper(int index, uint64_t wp) { > + /* format: operations, the wp. */ > + printf("wps[%d]: 0x%x\n", index, qcow2_get_wp(wp)>>BDRV_SECTOR_BITS); > +} > + > +/* > + * Perform a state assignment and a flush operation that writes the new wp > + * value to the dedicated location of the disk file. > + */ > +static int qcow2_write_wp_at(BlockDriverState *bs, uint64_t *wp, > + uint32_t index, BlockZoneState zs) { > + BDRVQcow2State *s = bs->opaque; > + int ret; > + > + qcow2_set_wp(wp, zs); > + ret = bdrv_pwrite(bs->file, s->zoned_header.zonedmeta_offset > + + sizeof(uint64_t) * index, sizeof(uint64_t), wp, 0); > + > + if (ret < 0) { > + goto exit; > + } > + qcow2_wp_tracking_helper(index, *wp); > + return ret; > + > +exit: > + error_report("Failed to write metadata with file"); > + return ret; > +} > + > +static int qcow2_check_active(BlockDriverState *bs) > +{ > + BDRVQcow2State *s = bs->opaque; > + > + if (!s->zoned_header.max_active_zones) { > + return 0; > + } > + > + if (s->nr_zones_exp_open + s->nr_zones_imp_open + s->nr_zones_closed > + < s->zoned_header.max_active_zones) { > + return 0; > + } > + > + return -1; > +} > + > +static int qcow2_check_open(BlockDriverState *bs) > +{ > + BDRVQcow2State *s = bs->opaque; > + int ret; > + > + if (!s->zoned_header.max_open_zones) { > + return 0; > + } > + > + if (s->nr_zones_exp_open + s->nr_zones_imp_open > + < s->zoned_header.max_open_zones) { > + return 0; > + } > + > + if(s->nr_zones_imp_open) { > + ret = qcow2_check_active(bs); > + if (ret == 0) { > + /* TODO: it takes O(n) time complexity (n = nr_zones). > + * Optimizations required. */ > + /* close one implicitly open zones to make it available */ > + for (int i = s->zoned_header.zone_nr_conv; > + i < bs->bl.nr_zones; ++i) { > + uint64_t *wp = &s->wps->wp[i]; > + if (qcow2_get_zs(*wp) == BLK_ZS_IOPEN) { > + ret = qcow2_write_wp_at(bs, wp, i, BLK_ZS_CLOSED); > + if (ret < 0) { > + return ret; > + } > + s->wps->wp[i] = *wp; > + s->nr_zones_imp_open--; > + s->nr_zones_closed++; > + break; > + } > + } > + return 0; > + } > + return ret; > + } > + > + return -1; > +} > + > +/* > + * The zoned device has limited zone resources of open, closed, active > + * zones. > + */ > +static int qcow2_check_zone_resources(BlockDriverState *bs, > + BlockZoneState zs) > +{ > + int ret; > + > + switch (zs) { > + case BLK_ZS_EMPTY: > + ret = qcow2_check_active(bs); > + if (ret < 0) { > + error_report("No enough active zones"); > + return ret; > + } > + return ret; > + case BLK_ZS_CLOSED: > + ret = qcow2_check_open(bs); > + if (ret < 0) { > + error_report("No enough open zones"); > + return ret; > + } > + return ret; > + default: > + return -EINVAL; > + } > + > +} > + > +static inline int qcow2_refresh_zonedmeta(BlockDriverState *bs) > +{ > + int ret; > + BDRVQcow2State *s = bs->opaque; > + uint64_t *temp = g_malloc(s->zoned_header.zonedmeta_size); > + ret = bdrv_pread(bs->file, s->zoned_header.zonedmeta_offset, > + s->zoned_header.zonedmeta_size, temp, 0); > + if (ret < 0) { > + error_report("Can not read metadata\n"); > + return ret; > + } > + > + memcpy(s->wps->wp, temp, s->zoned_header.zonedmeta_size); > + return 0; > +} > + > /* > * read qcow2 extension and fill bs > * start reading from start_offset > @@ -455,7 +613,19 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t > start_offset, > be32_to_cpu(zoned_ext.max_active_zones); > zoned_ext.max_append_sectors = > be32_to_cpu(zoned_ext.max_append_sectors); > + zoned_ext.zonedmeta_offset = > + be64_to_cpu(zoned_ext.zonedmeta_offset); > + zoned_ext.zonedmeta_size = be64_to_cpu(zoned_ext.zonedmeta_size); > s->zoned_header = zoned_ext; > + s->wps = g_malloc(sizeof(BlockZoneWps) > + + s->zoned_header.zonedmeta_size); > + ret = qcow2_refresh_zonedmeta(bs); > + if (ret < 0) { > + error_setg_errno(errp, -ret, "zonedmeta: " > + "Could not update zoned meta"); > + return ret; > + } > + qemu_co_mutex_init(&s->wps->colock); > > #ifdef DEBUG_EXT > printf("Qcow2: Got zoned format extension: " > @@ -1982,6 +2152,14 @@ static void qcow2_refresh_limits(BlockDriverState *bs, > Error **errp) > } > bs->bl.pwrite_zeroes_alignment = s->subcluster_size; > bs->bl.pdiscard_alignment = s->cluster_size; > + bs->bl.zoned = s->zoned_header.zoned; > + bs->bl.nr_zones = s->zoned_header.nr_zones; > + bs->wps = s->wps; > + bs->bl.max_append_sectors = s->zoned_header.max_append_sectors; > + bs->bl.max_active_zones = s->zoned_header.max_active_zones; > + bs->bl.max_open_zones = s->zoned_header.max_open_zones; > + bs->bl.zone_size = s->zoned_header.zone_size; > + bs->bl.write_granularity = BDRV_SECTOR_SIZE; > } > > static int qcow2_reopen_prepare(BDRVReopenState *state, > @@ -2672,9 +2850,26 @@ qcow2_co_pwritev_part(BlockDriverState *bs, int64_t > offset, int64_t bytes, > uint64_t host_offset; > QCowL2Meta *l2meta = NULL; > AioTaskPool *aio = NULL; > + int64_t start_offset, start_bytes; > + BlockZoneState zs; > + int64_t end; > + uint64_t *wp; > + int64_t zone_size = bs->bl.zone_size; > + int index; > > trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); > > + start_offset = offset; > + start_bytes = bytes; > + /* The offset should not less than the wp of that > + * zone where offset starts. */ > + if (zone_size) { > + index = start_offset / zone_size; > + wp = &s->wps->wp[index]; > + if (offset < qcow2_get_wp(*wp)) { > + return -EINVAL; > + } > + } > while (bytes != 0 && aio_task_pool_status(aio) == 0) { > > l2meta = NULL; > @@ -2720,6 +2915,47 @@ qcow2_co_pwritev_part(BlockDriverState *bs, int64_t > offset, int64_t bytes, > qiov_offset += cur_bytes; > trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); > } > + > + if (zone_size) { > + index = start_offset / zone_size; > + wp = &s->wps->wp[index]; > + uint64_t wpv = *wp; > + if (!QCOW2_ZT_IS_CONV(wpv)) { > + /* > + * Implicitly open one closed zone to write if there are zone > resources > + * left. > + */ > + zs = qcow2_get_zs(wpv); > + if (zs == BLK_ZS_CLOSED || zs == BLK_ZS_EMPTY) { > + ret = qcow2_check_zone_resources(bs, zs); > + if (ret < 0) { > + goto fail_nometa; > + } > + > + if (zs == BLK_ZS_CLOSED) { > + s->nr_zones_closed--; > + s->nr_zones_imp_open++; > + } else { > + s->nr_zones_imp_open++; > + } > + } > + > + /* align up (start_offset, zone_size), the start offset is not > + * necessarily power of two. */ > + end = ((start_offset + zone_size) / zone_size) * zone_size; > + if (start_offset + start_bytes <= end) { > + *wp = start_offset + start_bytes; > + } else { > + ret = -EINVAL; > + goto fail_nometa; > + } > + > + ret = qcow2_write_wp_at(bs, wp, index,BLK_ZS_IOPEN); > + if (ret < 0) { > + goto fail_nometa; > + } > + } > + } > ret = 0; > > qemu_co_mutex_lock(&s->lock); > @@ -3117,7 +3353,9 @@ int qcow2_update_header(BlockDriverState *bs) > .max_active_zones = > cpu_to_be32(s->zoned_header.max_active_zones), > .max_append_sectors = > - cpu_to_be32(s->zoned_header.max_append_sectors) > + cpu_to_be32(s->zoned_header.max_append_sectors), > + .zonedmeta_offset = > cpu_to_be64(s->zoned_header.zonedmeta_offset), > + .zonedmeta_size = cpu_to_be64(s->zoned_header.zonedmeta_size) > }; > ret = header_ext_add(buf, QCOW2_EXT_MAGIC_ZONED_FORMAT, > &zoned_header, sizeof(zoned_header), > @@ -3522,7 +3760,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, > Error **errp) > int version; > int refcount_order; > uint64_t *refcount_table; > - int ret; > + uint64_t zoned_meta_size, zoned_clusterlen; > + int ret, offset, i; > uint8_t compression_type = QCOW2_COMPRESSION_TYPE_ZLIB; > > assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2); > @@ -3823,6 +4062,48 @@ qcow2_co_create(BlockdevCreateOptions *create_options, > Error **errp) > s->zoned_header.max_open_zones = qcow2_opts->max_open_zones; > s->zoned_header.max_active_zones = qcow2_opts->max_active_zones; > s->zoned_header.max_append_sectors = qcow2_opts->max_append_sectors; > + s->zoned_header.nr_zones = qcow2_opts->size / qcow2_opts->zone_size; > + > + zoned_meta_size = sizeof(uint64_t) * s->zoned_header.nr_zones; > + uint64_t meta[zoned_meta_size];
zoned_meta_size is in bytes but the array is in uint64_t. I guess the
array size should be s->zoned_header.nr_zones (in zones) instead of
zoned_meta_size (in bytes).
Please use g_autoptr and g_new() for this to avoid stack overflow issues
if nr_zones is large.
> + memset(meta, 0, zoned_meta_size);
Unnecessary if you use g_new0(). Also, zeroing is probably unnecessary
since the for loops below fill in every element of the array.
> +
> + for (i = 0; i < s->zoned_header.zone_nr_conv; ++i) {
> + meta[i] = i * s->zoned_header.zone_size;
> + meta[i] += 1ULL << 59;
Bitwise OR ('|') is clearer than addition. You do not rely on or want
the add operation's arithmetic carry here.
> + }
> + for (; i < s->zoned_header.nr_zones; ++i) {
> + meta[i] = i * s->zoned_header.zone_size;
> + /* For sequential zones, the first four most significant bit
> + * indicates zone states. */
> + meta[i] += ((uint64_t)BLK_ZS_EMPTY << 60);
Bitwise OR.
> + }
> +
> + offset = qcow2_alloc_clusters(blk_bs(blk), zoned_meta_size);
> + if (offset < 0) {
> + error_setg_errno(errp, -offset, "Could not allocate clusters "
> + "for zoned metadata size");
> + goto out;
> + }
> + s->zoned_header.zonedmeta_offset = offset;
> + s->zoned_header.zonedmeta_size = zoned_meta_size;
> +
> + zoned_clusterlen = size_to_clusters(s, zoned_meta_size)
> + * s->cluster_size;
> + assert(qcow2_pre_write_overlap_check(bs, 0, offset,
> + zoned_clusterlen,false) == 0);
> + ret = bdrv_pwrite_zeroes(blk_bs(blk)->file, offset,
> + zoned_clusterlen, 0);
> + if (ret < 0) {
> + error_setg_errno(errp, -ret, "Could not zero fill zoned
> metadata");
> + goto out;
> + }
> + ret = bdrv_pwrite(blk_bs(blk)->file, offset, zoned_meta_size, meta,
> 0);
> + if (ret < 0) {
> + error_setg_errno(errp, -ret, "Could not write zoned metadata "
> + "to disk");
> + goto out;
> + }
> }
>
> /* Create a full header (including things like feature table) */
> @@ -4166,6 +4447,346 @@ static coroutine_fn int
> qcow2_co_pdiscard(BlockDriverState *bs,
> return ret;
> }
>
> +static int coroutine_fn
> +qcow2_co_zone_report(BlockDriverState *bs, int64_t offset,
> + unsigned int *nr_zones, BlockZoneDescriptor *zones)
> +{
> + BDRVQcow2State *s = bs->opaque;
> + uint64_t zone_size = s->zoned_header.zone_size;
> + int64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS;
> + int64_t size = bs->bl.nr_zones * zone_size;
> + int i = 0;
> + int si;
> +
> + if (zone_size > 0) {
> + si = offset / zone_size;
offset must be validated. It might be beyond the capacity of the device.
> + unsigned int nrz = *nr_zones;
nr_zones must be validated. It might be larger than bs->bl.nr_zones.
> + qemu_co_mutex_lock(&s->wps->colock);
> + for (; i < nrz; ++i) {
> + zones[i].start = (si + i) * zone_size;
> +
> + /* The last zone can be smaller than the zone size */
> + if ((si + i + 1) == bs->bl.nr_zones && size > capacity) {
> + zones[i].length = zone_size - (size - capacity);
> + } else {
> + zones[i].length = zone_size;
> + }
> + zones[i].cap = zone_size;
Should capacity also be capped for the last zone?
> +
> + uint64_t wp = s->wps->wp[si + i];
> + if (QCOW2_ZT_IS_CONV(wp)) {
> + zones[i].type = BLK_ZT_CONV;
> + zones[i].state = BLK_ZS_NOT_WP;
> + /* Clear the zone type bit */
> + wp &= ~(1ULL << 59);
> + } else {
> + zones[i].type = BLK_ZT_SWR;
> + zones[i].state = qcow2_get_zs(wp);
> + /* Clear the zone state bits */
> + wp = qcow2_get_wp(wp);
> + }
> +
> + zones[i].wp = wp;
> + if (si + i == bs->bl.nr_zones) {
> + break;
> + }
This check is too late because wp[] has already been indexed. It is
insufficient when the first zone is already out of bounds.
> + }
> + qemu_co_mutex_unlock(&s->wps->colock);
> + }
> + *nr_zones = i;
> + return 0;
> +}
> +
> +static int qcow2_open_zone(BlockDriverState *bs, uint32_t index) {
> + BDRVQcow2State *s = bs->opaque;
> + int ret;
> +
> + qemu_co_mutex_lock(&s->wps->colock);
> + uint64_t *wp = &s->wps->wp[index];
> + BlockZoneState zs = qcow2_get_zs(*wp);
> +
> + switch(zs) {
> + case BLK_ZS_EMPTY:
> + ret = qcow2_check_zone_resources(bs, BLK_ZS_EMPTY);
> + if (ret < 0) {
> + return ret;
> + }
> + break;
> + case BLK_ZS_IOPEN:
> + s->nr_zones_imp_open--;
> + break;
> + case BLK_ZS_EOPEN:
> + return 0;
> + case BLK_ZS_CLOSED:
> + ret = qcow2_check_zone_resources(bs, BLK_ZS_CLOSED);
> + if (ret < 0) {
> + return ret;
> + }
> + s->nr_zones_closed--;
> + break;
> + case BLK_ZS_FULL:
> + break;
> + default:
> + return -EINVAL;
> + }
s->wps->colock is not unlocked in the return code paths above.
> + ret = qcow2_write_wp_at(bs, wp, index, BLK_ZS_EOPEN);
I wanted to confirm with you and Damien that zone states are persisted
in the zoned storage model, even OPEN/CLOSED? To me, OPEN/CLOSED, seem
more related to runtime resource limits than persistent state that needs
to be stored.
> + if (!ret) {
> + s->nr_zones_exp_open++;
> + }
> + qemu_co_mutex_unlock(&s->wps->colock);
> + return ret;
> +}
> +
> +static int qcow2_close_zone(BlockDriverState *bs, uint32_t index) {
> + BDRVQcow2State *s = bs->opaque;
> + int ret;
> +
> + qemu_co_mutex_lock(&s->wps->colock);
> + uint64_t *wp = &s->wps->wp[index];
> + BlockZoneState zs = qcow2_get_zs(*wp);
> +
> + switch(zs) {
> + case BLK_ZS_EMPTY:
> + break;
> + case BLK_ZS_IOPEN:
> + s->nr_zones_imp_open--;
> + break;
> + case BLK_ZS_EOPEN:
> + s->nr_zones_exp_open--;
> + break;
> + case BLK_ZS_CLOSED:
> + ret = qcow2_check_zone_resources(bs, BLK_ZS_CLOSED);
> + if (ret < 0) {
> + return ret;
> + }
> + s->nr_zones_closed--;
> + break;
> + case BLK_ZS_FULL:
> + break;
> + default:
> + return -EINVAL;
> + }
s->wps->colock is not unlocked in the return code paths above.
> +
> + if (zs == BLK_ZS_EMPTY) {
> + ret = qcow2_write_wp_at(bs, wp, index, BLK_ZS_EMPTY);
> + } else {
> + ret = qcow2_write_wp_at(bs, wp, index, BLK_ZS_CLOSED);
> + if (!ret) {
> + s->nr_zones_closed++;
> + }
> + }
> + qemu_co_mutex_unlock(&s->wps->colock);
> + return ret;
> +}
> +
> +static int qcow2_finish_zone(BlockDriverState *bs, uint32_t index) {
> + BDRVQcow2State *s = bs->opaque;
> + int ret;
> +
> + qemu_co_mutex_lock(&s->wps->colock);
> + uint64_t *wp = &s->wps->wp[index];
> + BlockZoneState zs = qcow2_get_zs(*wp);
> +
> + switch(zs) {
> + case BLK_ZS_EMPTY:
> + ret = qcow2_check_zone_resources(bs, BLK_ZS_EMPTY);
> + if (ret < 0) {
> + return ret;
> + }
> + break;
> + case BLK_ZS_IOPEN:
> + s->nr_zones_imp_open--;
> + break;
> + case BLK_ZS_EOPEN:
> + s->nr_zones_exp_open--;
> + break;
> + case BLK_ZS_CLOSED:
> + ret = qcow2_check_zone_resources(bs, BLK_ZS_CLOSED);
> + if (ret < 0) {
> + return ret;
> + }
> + s->nr_zones_closed--;
> + break;
> + case BLK_ZS_FULL:
> + return 0;
> + default:
> + return -EINVAL;
> + }
s->wps->colock is not unlocked in the return code paths above.
> +
> + *wp = (index + 1) * s->zoned_header.zone_size;
There is an integer overflow here. Please see my comment in
qcow2_reset_zone() below.
> + ret = qcow2_write_wp_at(bs, wp, index, BLK_ZS_FULL);
> + qemu_co_mutex_unlock(&s->wps->colock);
> + return ret;
> +}
> +
> +static int qcow2_reset_zone(BlockDriverState *bs, uint32_t index,
> + int64_t len) {
> + BDRVQcow2State *s = bs->opaque;
> + int nrz = bs->bl.nr_zones;
> + int zone_size = bs->bl.zone_size;
> + int n, ret = 0;
> +
> + qemu_co_mutex_lock(&s->wps->colock);
> + uint64_t *wp = &s->wps->wp[index];
> + if (len == bs->total_sectors << BDRV_SECTOR_BITS) {
> + n = nrz;
> + index = 0;
> + } else {
> + n = len / zone_size;
> + }
> +
> + for (int i = 0; i < n; ++i) {
> + uint64_t *wp_i = (uint64_t *)(wp + i);
> + uint64_t wpi_v = *wp_i;
> + if (QCOW2_ZT_IS_CONV(wpi_v)) {
> + continue;
> + }
> +
> + BlockZoneState zs = qcow2_get_zs(wpi_v);
> + switch (zs) {
> + case BLK_ZS_EMPTY:
> + break;
> + case BLK_ZS_IOPEN:
> + s->nr_zones_imp_open--;
> + break;
> + case BLK_ZS_EOPEN:
> + s->nr_zones_exp_open--;
> + break;
> + case BLK_ZS_CLOSED:
> + s->nr_zones_closed--;
> + break;
> + case BLK_ZS_FULL:
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + if (zs == BLK_ZS_EMPTY) {
> + continue;
> + }
> +
> + *wp_i = (index + i) * zone_size;
This calculation needs uint64_t to avoid overflowing int. The types
involved are:
uint64_t = (uint32_t + int) * int;
You can fix it using:
*wp_i = ((uint64_t)index + i) * zone_size;
Then the entire expression will be evaluated as a 64-bit integer instead
of a 32-bit integer.
> + ret = qcow2_write_wp_at(bs, wp_i, index + i, BLK_ZS_EMPTY);
> + if (ret < 0) {
> + return ret;
s->wps->colock must be unlocked.
> + }
> + /* clear data */
> + ret = qcow2_co_pwrite_zeroes(bs, qcow2_get_wp(*wp_i), zone_size, 0);
Does zone reset guarantee that the data blocks will be zeroed according
to the zoned storage model?
> + if (ret < 0) {
> + error_report("Failed to reset zone at 0x%" PRIx64 "", *wp_i);
> + }
> + }
> + qemu_co_mutex_unlock(&s->wps->colock);
> + return ret;
> +}
> +
> +static int coroutine_fn qcow2_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp
> op,
> + int64_t offset, int64_t len)
> +{
> + BDRVQcow2State *s = bs->opaque;
> + int ret = 0;
> + int64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS;
> + int64_t zone_size = s->zoned_header.zone_size;
> + int64_t zone_size_mask = zone_size - 1;
> + uint32_t index = offset / zone_size;
> + BlockZoneWps *wps = s->wps;
> +
> + if (offset & zone_size_mask) {
> + error_report("sector offset %" PRId64 " is not aligned to zone size"
> + " %" PRId64 "", offset / 512, zone_size / 512);
> + return -EINVAL;
> + }
> +
> + if (((offset + len) < capacity && len & zone_size_mask) ||
> + offset + len > capacity) {
> + error_report("number of sectors %" PRId64 " is not aligned to zone"
> + " size %" PRId64 "", len / 512, zone_size / 512);
> + return -EINVAL;
> + }
> +
> + qemu_co_mutex_lock(&wps->colock);
> + uint64_t wpv = wps->wp[offset / zone_size];
Use index here instead of recalculating it?
> + if (QCOW2_ZT_IS_CONV(wpv) && len != capacity) {
> + error_report("zone mgmt operations are not allowed for "
> + "conventional zones");
> + ret = -EIO;
> + goto unlock;
> + }
> + qemu_co_mutex_unlock(&wps->colock);
> +
> + switch(op) {
> + case BLK_ZO_OPEN:
> + ret = qcow2_open_zone(bs, index);
> + break;
> + case BLK_ZO_CLOSE:
> + ret = qcow2_close_zone(bs, index);
> + break;
> + case BLK_ZO_FINISH:
> + ret = qcow2_finish_zone(bs, index);
> + break;
> + case BLK_ZO_RESET:
> + ret = qcow2_reset_zone(bs, index, len);
> + break;
> + default:
> + error_report("Unsupported zone op: 0x%x", op);
> + ret = -ENOTSUP;
> + break;
> + }
> + return ret;
> +
> +unlock:
> + qemu_co_mutex_unlock(&wps->colock);
> + return ret;
> +}
> +
> +static int coroutine_fn
> +qcow2_co_zone_append(BlockDriverState *bs, int64_t *offset, QEMUIOVector
> *qiov,
> + BdrvRequestFlags flags)
> +{
> + assert(flags == 0);
> + BDRVQcow2State *s = bs->opaque;
> + int ret;
> + int64_t zone_size_mask = bs->bl.zone_size - 1;
> + int64_t iov_len = 0;
> + int64_t len = 0;
> +
> + /* offset + len should not pass the end of that zone starting from
> offset */
> + if (*offset & zone_size_mask) {
> + error_report("sector offset %" PRId64 " is not aligned to zone size "
> + "%" PRId32 "", *offset / 512, bs->bl.zone_size / 512);
> + return -EINVAL;
> + }
> +
> + int64_t wg = bs->bl.write_granularity;
> + int64_t wg_mask = wg - 1;
> + for (int i = 0; i < qiov->niov; i++) {
> + iov_len = qiov->iov[i].iov_len;
> + if (iov_len & wg_mask) {
> + error_report("len of IOVector[%d] %" PRId64 " is not aligned to "
> + "block size %" PRId64 "", i, iov_len, wg);
> + return -EINVAL;
> + }
> + }
> + len = qiov->size;
> +
> + if ((len >> BDRV_SECTOR_BITS) > bs->bl.max_append_sectors) {
> + return -ENOTSUP;
> + }
> +
> + qemu_co_mutex_lock(&s->wps->colock);
> + uint64_t wp = s->wps->wp[*offset / bs->bl.zone_size];
Where is *offset checked against nr_zones to prevent an access beyond
the end of the array?
> + uint64_t wp_i = qcow2_get_wp(wp);
> + ret = qcow2_co_pwritev_part(bs, wp_i, len, qiov, 0, 0);
> + if (ret == 0) {
> + *offset = wp_i;
> + } else {
> + error_report("qcow2: zap failed");
> + }
> +
> + qemu_co_mutex_unlock(&s->wps->colock);
> + return ret;
> +}
> +
> static int coroutine_fn GRAPH_RDLOCK
> qcow2_co_copy_range_from(BlockDriverState *bs,
> BdrvChild *src, int64_t src_offset,
> @@ -6214,6 +6835,10 @@ BlockDriver bdrv_qcow2 = {
> .bdrv_co_pwritev_part = qcow2_co_pwritev_part,
> .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
>
> + .bdrv_co_zone_report = qcow2_co_zone_report,
> + .bdrv_co_zone_mgmt = qcow2_co_zone_mgmt,
> + .bdrv_co_zone_append = qcow2_co_zone_append,
> +
> .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
> .bdrv_co_pdiscard = qcow2_co_pdiscard,
> .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
> diff --git a/block/qcow2.h b/block/qcow2.h
> index fe18dc4d97..a3a96ddbce 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -246,6 +246,8 @@ typedef struct Qcow2ZonedHeaderExtension {
> uint32_t max_active_zones;
> uint32_t max_open_zones;
> uint32_t max_append_sectors;
> + uint64_t zonedmeta_offset;
> + uint64_t zonedmeta_size;
> uint8_t padding[3];
> } QEMU_PACKED Qcow2ZonedHeaderExtension;
>
> --
> 2.40.1
>
signature.asc
Description: PGP signature
