On Wed, May 08, 2024 at 05:36:25PM +0800, Changqi Lu wrote: > Add persistent reservation in/out operations in the > SCSI device layer. By introducing the persistent > reservation in/out api, this enables the SCSI device > to perform reservation-related tasks, including querying > keys, querying reservation status, registering reservation > keys, initiating and releasing reservations, as well as > clearing and preempting reservations held by other keys. > > These operations are crucial for management and control of > shared storage resources in a persistent manner. > > Signed-off-by: Changqi Lu <[email protected]> > Signed-off-by: zhenwei pi <[email protected]> > --- > hw/scsi/scsi-disk.c | 302 ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 302 insertions(+)
Paolo: Please review for buffer overflows. I find the buffer size
assumption in the SCSI layer mysterious (e.g. scsi_req_get_buf() returns
a void* and it's not obvious how large the buffer is), so I didn't check
for buffer overflows.
>
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index 4bd7af9d0c..bdd66c4026 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -32,6 +32,7 @@
> #include "migration/vmstate.h"
> #include "hw/scsi/emulation.h"
> #include "scsi/constants.h"
> +#include "scsi/utils.h"
> #include "sysemu/block-backend.h"
> #include "sysemu/blockdev.h"
> #include "hw/block/block.h"
> @@ -1474,6 +1475,296 @@ static void scsi_disk_emulate_read_data(SCSIRequest
> *req)
> scsi_req_complete(&r->req, GOOD);
> }
>
> +typedef struct SCSIPrReadKeys {
> + uint32_t generation;
> + uint32_t num_keys;
> + uint64_t *keys;
> + void *req;
> +} SCSIPrReadKeys;
> +
> +typedef struct SCSIPrReadReservation {
> + uint32_t generation;
> + uint64_t key;
> + BlockPrType type;
> + void *req;
> +} SCSIPrReadReservation;
> +
> +static void scsi_pr_read_keys_complete(void *opaque, int ret)
> +{
> + int num_keys;
> + uint8_t *buf;
> + SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque;
> + SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> + assert(blk_get_aio_context(s->qdev.conf.blk) ==
> + qemu_get_current_aio_context());
> +
> + assert(r->req.aiocb != NULL);
> + r->req.aiocb = NULL;
> +
> + if (scsi_disk_req_check_error(r, ret, true)) {
> + goto done;
> + }
> +
> + buf = scsi_req_get_buf(&r->req);
> + num_keys = MIN(blk_keys->num_keys, ret);
> + blk_keys->generation = cpu_to_be32(blk_keys->generation);
> + memcpy(&buf[0], &blk_keys->generation, 4);
> + for (int i = 0; i < num_keys; i++) {
> + blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]);
> + memcpy(&buf[8 + i * 8], &blk_keys->keys[i], 8);
> + }
> + num_keys = cpu_to_be32(num_keys * 8);
> + memcpy(&buf[4], &num_keys, 4);
> +
> + scsi_req_data(&r->req, r->buflen);
> +done:
> + scsi_req_unref(&r->req);
> + g_free(blk_keys->keys);
> + g_free(blk_keys);
> +}
> +
> +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req)
> +{
> + SCSIPrReadKeys *blk_keys;
> + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> + int buflen = MIN(r->req.cmd.xfer, r->buflen);
> + int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t);
> +
> + blk_keys = g_new0(SCSIPrReadKeys, 1);
> + blk_keys->generation = 0;
> + /* num_keys is the maximum number of keys that can be transmitted */
> + blk_keys->num_keys = num_keys;
> + blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys);
> + blk_keys->req = r;
> +
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk,
> &blk_keys->generation,
> + blk_keys->num_keys, blk_keys->keys,
> + scsi_pr_read_keys_complete,
> blk_keys);
> + return 0;
> +}
> +
> +static void scsi_pr_read_reservation_complete(void *opaque, int ret)
> +{
> + uint8_t *buf;
> + uint32_t num_keys = 0;
> + SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque;
> + SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> + assert(blk_get_aio_context(s->qdev.conf.blk) ==
> + qemu_get_current_aio_context());
> +
> + assert(r->req.aiocb != NULL);
> + r->req.aiocb = NULL;
> +
> + if (scsi_disk_req_check_error(r, ret, true)) {
> + goto done;
> + }
> +
> + buf = scsi_req_get_buf(&r->req);
> + blk_rsv->generation = cpu_to_be32(blk_rsv->generation);
> + memcpy(&buf[0], &blk_rsv->generation, 4);
> + if (ret) {
> + num_keys = cpu_to_be32(16);
The SPC-6 calls this field Additional Length, which is clearer than
num_keys (there is only 1 key here!). Could you rename it to
additional_len to avoid confusion?
> + blk_rsv->key = cpu_to_be64(blk_rsv->key);
> + memcpy(&buf[8], &blk_rsv->key, 8);
> + buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf;
> + } else {
> + num_keys = cpu_to_be32(0);
> + }
> +
> + memcpy(&buf[4], &num_keys, 4);
> + scsi_req_data(&r->req, r->buflen);
> +
> +done:
> + scsi_req_unref(&r->req);
> + g_free(blk_rsv);
> +}
> +
> +static int scsi_disk_emulate_pr_read_reservation(SCSIRequest *req)
> +{
> + SCSIPrReadReservation *blk_rsv;
> + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + blk_rsv = g_malloc(sizeof(*blk_rsv));
> + blk_rsv->generation = 0;
> + blk_rsv->key = 0;
> + blk_rsv->type = 0;
> + blk_rsv->req = r;
> +
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_read_reservation(s->qdev.conf.blk,
> + &blk_rsv->generation, &blk_rsv->key, &blk_rsv->type,
> + scsi_pr_read_reservation_complete, blk_rsv);
> + return 0;
> +}
> +
> +static void scsi_aio_pr_complete(void *opaque, int ret)
> +{
> + SCSIDiskReq *r = (SCSIDiskReq *)opaque;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> + /* The request must only run in the BlockBackend's AioContext */
> + assert(blk_get_aio_context(s->qdev.conf.blk) ==
> + qemu_get_current_aio_context());
> +
> + assert(r->req.aiocb != NULL);
> + r->req.aiocb = NULL;
> +
> + if (scsi_disk_req_check_error(r, ret, true)) {
> + goto done;
> + }
> +
> + scsi_req_complete(&r->req, GOOD);
> +
> +done:
> + scsi_req_unref(&r->req);
> +}
> +
> +static int scsi_disk_emulate_pr_register(SCSIDiskReq *r, uint64_t old_key,
> + uint64_t new_key, SCSIPrType type,
> + bool ignore_key)
> +{
> + SCSIRequest *req = &r->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_register(s->qdev.conf.blk, old_key, new_key,
> + scsi_pr_type_to_block(type),
> + ignore_key, scsi_aio_pr_complete, r);
> +
> + return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_reserve(SCSIDiskReq *r, uint64_t key,
> + SCSIPrType type)
> +{
> + SCSIRequest *req = &r->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_reserve(s->qdev.conf.blk, key,
> + scsi_pr_type_to_block(type),
> + scsi_aio_pr_complete, r);
> +
> + return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_release(SCSIDiskReq *r, uint64_t key,
> + SCSIPrType type)
> +{
> + SCSIRequest *req = &r->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_release(s->qdev.conf.blk, key,
> + scsi_pr_type_to_block(type),
> + scsi_aio_pr_complete, r);
> +
> + return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_clear(SCSIDiskReq *r, uint64_t key)
> +{
> + SCSIRequest *req = &r->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_clear(s->qdev.conf.blk, key,
> + scsi_aio_pr_complete, r);
> +
> + return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_preempt(SCSIDiskReq *r, uint64_t new_key,
> + uint64_t old_key, SCSIPrType type,
> + bool abort)
> +{
> + SCSIRequest *req = &r->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_preempt(s->qdev.conf.blk, new_key, old_key,
> + scsi_pr_type_to_block(type), abort,
> + scsi_aio_pr_complete, r);
> +
> + return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_in(SCSIRequest *req)
> +{
> + int rc;
> + SCSIPrInAction action = req->cmd.buf[1] & 0x1f;
> +
> + switch (action) {
> + case SCSI_PR_IN_READ_KEYS:
> + rc = scsi_disk_emulate_pr_read_keys(req);
> + break;
> + case SCSI_PR_IN_READ_RESERVATION:
> + rc = scsi_disk_emulate_pr_read_reservation(req);
> + break;
> + default:
> + return -ENOTSUP;
> + }
> +
> + return rc;
> +}
> +
> +static int scsi_disk_emulate_pr_out(SCSIDiskReq *r, uint8_t *inbuf)
> +{
> + int rc;
> + uint64_t old_key, new_key;
> + SCSIPrOutAction action;
> + SCSIPrScope scope;
> + SCSIPrType type;
> + SCSIRequest *req = &r->req;
> +
> + memcpy(&old_key, &inbuf[0], 8);
> + old_key = be64_to_cpu(old_key);
> + memcpy(&new_key, &inbuf[8], 8);
> + new_key = be64_to_cpu(new_key);
> + action = req->cmd.buf[1] & 0x1f;
> + scope = (req->cmd.buf[2] >> 4) & 0x0f;
> + type = req->cmd.buf[2] & 0x0f;
> +
> + if (scope != SCSI_PR_LU_SCOPE) {
> + return -ENOTSUP;
> + }
> +
> + switch (action) {
> + case SCSI_PR_OUT_REGISTER:
> + rc = scsi_disk_emulate_pr_register(r, old_key, new_key, type, false);
> + break;
> + case SCSI_PR_OUT_REG_AND_IGNORE_KEY:
> + rc = scsi_disk_emulate_pr_register(r, old_key, new_key, type, true);
> + break;
> + case SCSI_PR_OUT_RESERVE:
> + rc = scsi_disk_emulate_pr_reserve(r, old_key, type);
> + break;
> + case SCSI_PR_OUT_RELEASE:
> + rc = scsi_disk_emulate_pr_release(r, old_key, type);
> + break;
> + case SCSI_PR_OUT_CLEAR:
> + rc = scsi_disk_emulate_pr_clear(r, old_key);
> + break;
> + case SCSI_PR_OUT_PREEMPT:
> + rc = scsi_disk_emulate_pr_preempt(r, old_key, new_key, type, false);
> + break;
> + case SCSI_PR_OUT_PREEMPT_AND_ABORT:
> + rc = scsi_disk_emulate_pr_preempt(r, old_key, new_key, type, true);
> + break;
> + default:
> + return -ENOTSUP;
> + }
> +
> + return rc;
> +}
> +
> static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
> uint8_t *inbuf, int inlen)
> {
> @@ -1957,6 +2248,9 @@ static void scsi_disk_emulate_write_data(SCSIRequest
> *req)
> scsi_req_complete(&r->req, GOOD);
> break;
>
> + case PERSISTENT_RESERVE_OUT:
> + scsi_disk_emulate_pr_out(r, r->iov.iov_base);
> + break;
> default:
> abort();
> }
> @@ -2213,6 +2507,12 @@ static int32_t scsi_disk_emulate_command(SCSIRequest
> *req, uint8_t *buf)
> case FORMAT_UNIT:
> trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
> break;
> + case PERSISTENT_RESERVE_OUT:
> + break;
> + case PERSISTENT_RESERVE_IN:
> + scsi_req_ref(&r->req);
Please add a comment indicating which scsi_req_unref() this ref is
paired with. That makes it easier to understand request reference
counting.
> + scsi_disk_emulate_pr_in(req);
> + return 0;
> default:
> trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
> scsi_command_name(buf[0]));
> @@ -2632,6 +2932,8 @@ static const SCSIReqOps *const
> scsi_disk_reqops_dispatch[256] = {
> [VERIFY_12] = &scsi_disk_emulate_reqops,
> [VERIFY_16] = &scsi_disk_emulate_reqops,
> [FORMAT_UNIT] = &scsi_disk_emulate_reqops,
> + [PERSISTENT_RESERVE_IN] = &scsi_disk_emulate_reqops,
> + [PERSISTENT_RESERVE_OUT] = &scsi_disk_emulate_reqops,
>
> [READ_6] = &scsi_disk_dma_reqops,
> [READ_10] = &scsi_disk_dma_reqops,
> --
> 2.20.1
>
signature.asc
Description: PGP signature
