On Mon, Sep 09, 2024 at 07:34:52PM +0800, Changqi Lu wrote: > Add reservation acquire, reservation register, > reservation release and reservation report commands > in the nvme device layer. > > By introducing these commands, this enables the nvme > device to perform reservation-related tasks, including > querying keys, querying reservation status, registering > reservation keys, initiating and releasing reservations, > as well as clearing and preempting reservations held by > other keys. > > These commands are crucial for management and control of > shared storage resources in a persistent manner. > Signed-off-by: Changqi Lu <[email protected]> > Signed-off-by: zhenwei pi <[email protected]> > Acked-by: Klaus Jensen <[email protected]> > --- > hw/nvme/ctrl.c | 346 +++++++++++++++++++++++++++++++++++++++++++ > hw/nvme/ns.c | 6 + > hw/nvme/nvme.h | 9 ++ > include/block/nvme.h | 44 ++++++ > 4 files changed, 405 insertions(+) > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > index ad212de723..ba98b86f9c 100644 > --- a/hw/nvme/ctrl.c > +++ b/hw/nvme/ctrl.c > @@ -294,6 +294,10 @@ static const uint32_t nvme_cse_iocs_nvm[256] = { > [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, > [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP, > [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, > + [NVME_CMD_RESV_REGISTER] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_REPORT] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_ACQUIRE] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_RELEASE] = NVME_CMD_EFF_CSUPP, > }; > > static const uint32_t nvme_cse_iocs_zoned[256] = { > @@ -308,6 +312,10 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { > [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, > [NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, > [NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_REGISTER] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_REPORT] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_ACQUIRE] = NVME_CMD_EFF_CSUPP, > + [NVME_CMD_RESV_RELEASE] = NVME_CMD_EFF_CSUPP, > }; > > static void nvme_process_sq(void *opaque); > @@ -1747,6 +1755,13 @@ static void nvme_aio_err(NvmeRequest *req, int ret) > case NVME_CMD_READ: > status = NVME_UNRECOVERED_READ; > break; > + case NVME_CMD_RESV_REPORT: > + if (ret == -ENOTSUP) { > + status = NVME_INVALID_OPCODE; > + } else { > + status = NVME_UNRECOVERED_READ; > + } > + break; > case NVME_CMD_FLUSH: > case NVME_CMD_WRITE: > case NVME_CMD_WRITE_ZEROES: > @@ -1754,6 +1769,15 @@ static void nvme_aio_err(NvmeRequest *req, int ret) > case NVME_CMD_COPY: > status = NVME_WRITE_FAULT; > break; > + case NVME_CMD_RESV_REGISTER: > + case NVME_CMD_RESV_ACQUIRE: > + case NVME_CMD_RESV_RELEASE: > + if (ret == -ENOTSUP) { > + status = NVME_INVALID_OPCODE; > + } else { > + status = NVME_WRITE_FAULT; > + } > + break; > default: > status = NVME_INTERNAL_DEV_ERROR; > break; > @@ -2692,6 +2716,320 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest > *req) > return NVME_NO_COMPLETE; > } > > +typedef struct NvmeKeyInfo { > + uint64_t cr_key; > + uint64_t nr_key; > +} NvmeKeyInfo; > + > +static uint16_t nvme_resv_register(NvmeCtrl *n, NvmeRequest *req) > +{ > + int ret; > + NvmeKeyInfo key_info; > + NvmeNamespace *ns = req->ns; > + uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10); > + bool ignore_key = cdw10 >> 3 & 0x1; > + uint8_t action = cdw10 & 0x7; > + uint8_t ptpl = cdw10 >> 30 & 0x3; > + bool aptpl; > + > + if (!nvme_support_pr(ns)) { > + return NVME_INVALID_OPCODE; > + } > + > + switch (ptpl) { > + case NVME_RESV_PTPL_NO_CHANGE: > + aptpl = (ns->id_ns.rescap & NVME_PR_CAP_PTPL) ? true : false; > + break; > + case NVME_RESV_PTPL_DISABLE: > + aptpl = false; > + break; > + case NVME_RESV_PTPL_ENABLE: > + aptpl = true; > + break; > + default: > + return NVME_INVALID_FIELD; > + } > + > + ret = nvme_h2c(n, (uint8_t *)&key_info, sizeof(NvmeKeyInfo), req); > + if (ret) { > + return ret; > + } > + > + switch (action) { > + case NVME_RESV_REGISTER_ACTION_REGISTER: > + req->aiocb = blk_aio_pr_register(ns->blkconf.blk, 0, > + key_info.nr_key, 0, aptpl,
Is le64_to_cpu() missing on key_info fields?
> + ignore_key, nvme_misc_cb,
> + req);
> + break;
> + case NVME_RESV_REGISTER_ACTION_UNREGISTER:
> + req->aiocb = blk_aio_pr_register(ns->blkconf.blk, key_info.cr_key, 0,
> + 0, aptpl, ignore_key,
> + nvme_misc_cb, req);
> + break;
> + case NVME_RESV_REGISTER_ACTION_REPLACE:
> + req->aiocb = blk_aio_pr_register(ns->blkconf.blk, key_info.cr_key,
> + key_info.nr_key, 0, aptpl,
> ignore_key,
> + nvme_misc_cb, req);
> + break;
> + default:
> + return NVME_INVALID_FIELD;
> + }
> +
> + return NVME_NO_COMPLETE;
> +}
> +
> +static uint16_t nvme_resv_release(NvmeCtrl *n, NvmeRequest *req)
> +{
> + int ret;
> + uint64_t cr_key;
> + NvmeNamespace *ns = req->ns;
> + uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10);
> + uint8_t action = cdw10 & 0x7;
> + NvmeResvType type = cdw10 >> 8 & 0xff;
> +
> + if (!nvme_support_pr(ns)) {
> + return NVME_INVALID_OPCODE;
> + }
> +
> + ret = nvme_h2c(n, (uint8_t *)&cr_key, sizeof(cr_key), req);
> + if (ret) {
> + return ret;
> + }
> +
> + switch (action) {
> + case NVME_RESV_RELEASE_ACTION_RELEASE:
> + req->aiocb = blk_aio_pr_release(ns->blkconf.blk, cr_key,
le64_to_cpu() missing on cr_key?
> + nvme_pr_type_to_block(type),
> + nvme_misc_cb, req);
> + break;
> + case NVME_RESV_RELEASE_ACTION_CLEAR:
> + req->aiocb = blk_aio_pr_clear(ns->blkconf.blk, cr_key,
> + nvme_misc_cb, req);
> + break;
> + default:
> + return NVME_INVALID_FIELD;
> + }
> +
> + return NVME_NO_COMPLETE;
> +}
> +
> +static uint16_t nvme_resv_acquire(NvmeCtrl *n, NvmeRequest *req)
> +{
> + int ret;
> + NvmeKeyInfo key_info;
> + NvmeNamespace *ns = req->ns;
> + uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10);
> + uint8_t action = cdw10 & 0x7;
> + NvmeResvType type = cdw10 >> 8 & 0xff;
> +
> + if (!nvme_support_pr(ns)) {
> + return NVME_INVALID_OPCODE;
> + }
> +
> + ret = nvme_h2c(n, (uint8_t *)&key_info, sizeof(NvmeKeyInfo), req);
> + if (ret) {
> + return ret;
> + }
le64_to_cpu() missing on key_info fields?
> +
> + switch (action) {
> + case NVME_RESV_ACQUIRE_ACTION_ACQUIRE:
> + req->aiocb = blk_aio_pr_reserve(ns->blkconf.blk, key_info.cr_key,
> + nvme_pr_type_to_block(type),
> + nvme_misc_cb, req);
> + break;
> + case NVME_RESV_ACQUIRE_ACTION_PREEMPT:
> + req->aiocb = blk_aio_pr_preempt(ns->blkconf.blk,
> + key_info.cr_key, key_info.nr_key,
> + nvme_pr_type_to_block(type),
> + false, nvme_misc_cb, req);
> + break;
> + case NVME_RESV_ACQUIRE_ACTION_PREEMPT_AND_ABORT:
> + req->aiocb = blk_aio_pr_preempt(ns->blkconf.blk, key_info.cr_key,
> + key_info.nr_key, type, true,
> + nvme_misc_cb, req);
> + break;
> + default:
> + return NVME_INVALID_FIELD;
> + }
> +
> + return NVME_NO_COMPLETE;
> +}
> +
> +typedef struct NvmeResvKeys {
> + uint32_t generation;
> + uint32_t num_keys;
> + uint64_t *keys;
> + NvmeRequest *req;
> +} NvmeResvKeys;
> +
> +typedef struct NvmeReadReservation {
> + uint32_t generation;
> + uint64_t key;
> + BlockPrType type;
> + NvmeRequest *req;
> + NvmeResvKeys *keys_info;
> +} NvmeReadReservation;
> +
> +static int nvme_read_reservation_cb(NvmeReadReservation *reservation)
> +{
> + int rc;
> + NvmeReservationStatus *nvme_status;
> + NvmeRequest *req = reservation->req;
> + NvmeCtrl *n = req->sq->ctrl;
> + NvmeResvKeys *keys_info = reservation->keys_info;
> + int len = sizeof(NvmeReservationStatusHeader) +
> + sizeof(NvmeRegisteredCtrl) * keys_info->num_keys;
> +
> + nvme_status = g_malloc(len);
> + nvme_status->header.gen = reservation->generation;
> + nvme_status->header.rtype = block_pr_type_to_nvme(reservation->type);
> + nvme_status->header.regctl = keys_info->num_keys;
> + for (int i = 0; i < keys_info->num_keys; i++) {
> + nvme_status->regctl_ds[i].cntlid = nvme_ctrl(req)->cntlid;
> + nvme_status->regctl_ds[i].rkey = keys_info->keys[i];
> + nvme_status->regctl_ds[i].rcsts = keys_info->keys[i] ==
> + reservation->key ? 1 : 0;
> + /* hostid is not supported currently */
> + memset(&nvme_status->regctl_ds[i].hostid, 0, 8);
> + }
> +
> + rc = nvme_c2h(n, (uint8_t *)nvme_status, len, req);
> + g_free(nvme_status);
> + return rc;
> +}
> +
> +static int nvme_read_reservation_ext_cb(NvmeReadReservation *reservation)
> +{
> + int rc;
> + NvmeReservationStatusExt *nvme_status_ext;
> + NvmeRequest *req = reservation->req;
> + NvmeCtrl *n = req->sq->ctrl;
> + NvmeResvKeys *keys_info = reservation->keys_info;
> + int len = sizeof(NvmeReservationStatusHeader) +
> + sizeof(uint8_t) * 40 +
> + sizeof(NvmeRegisteredCtrlExt) * keys_info->num_keys;
> +
> + nvme_status_ext = g_malloc(len);
> + nvme_status_ext->header.gen = cpu_to_be32(reservation->generation);
> + nvme_status_ext->header.rtype = block_pr_type_to_nvme(reservation->type);
> + nvme_status_ext->header.regctl = cpu_to_be16(keys_info->num_keys);
> +
> + for (int i = 0; i < keys_info->num_keys; i++) {
> + uint16_t ctnlid = nvme_ctrl(req)->cntlid;
> + nvme_status_ext->regctl_eds[i].cntlid = cpu_to_be16(ctnlid);
> + nvme_status_ext->regctl_eds[i].rkey =
> cpu_to_be64(keys_info->keys[i]);
> + nvme_status_ext->regctl_eds[i].rcsts = keys_info->keys[i] ==
> + reservation->key ? 1 : 0;
> + /* hostid is not supported currently */
> + memset(&nvme_status_ext->regctl_eds[i].hostid, 0, 16);
> + }
> +
> + rc = nvme_c2h(n, (uint8_t *)nvme_status_ext, len, req);
> + g_free(nvme_status_ext);
> + return rc;
> +}
> +
> +static void nvme_resv_read_reservation_cb(void *opaque, int ret)
> +{
> + NvmeReadReservation *reservation = opaque;
> + NvmeRequest *req = reservation->req;
> + bool eds = req->cmd.cdw11 & 0x1;
Missing le32_to_cpu()?
> + NvmeResvKeys *keys_info = reservation->keys_info;
> +
> + if (ret < 0) {
> + goto out;
> + }
> +
> + if (eds) {
> + ret = nvme_read_reservation_ext_cb(reservation);
> + } else {
> + ret = nvme_read_reservation_cb(reservation);
> + }
> +
> +out:
> + g_free(keys_info->keys);
> + g_free(keys_info);
> + g_free(reservation);
> + nvme_misc_cb(req, ret);
> +}
> +
> +static void nvme_resv_read_keys_cb(void *opaque, int ret)
> +{
> + NvmeResvKeys *keys_info = opaque;
> + NvmeRequest *req = keys_info->req;
> + NvmeNamespace *ns = req->ns;
> + NvmeReadReservation *reservation;
> +
> + if (ret < 0) {
> + goto out;
> + }
> +
> + keys_info->num_keys = MIN(ret, keys_info->num_keys);
> + reservation = g_new0(NvmeReadReservation, 1);
> + memset(reservation, 0, sizeof(*reservation));
> + reservation->req = req;
> + reservation->keys_info = keys_info;
> +
> + req->aiocb = blk_aio_pr_read_reservation(ns->blkconf.blk,
> + &reservation->generation, &reservation->key,
> + &reservation->type, nvme_resv_read_reservation_cb,
> + reservation);
> + return;
> +
> +out:
> + g_free(keys_info->keys);
> + g_free(keys_info);
> + nvme_misc_cb(req, ret);
> +}
> +
> +
> +static uint16_t nvme_resv_report(NvmeCtrl *n, NvmeRequest *req)
> +{
> + int num_keys;
> + uint32_t cdw10 = req->cmd.cdw10;
> + uint32_t cdw11 = req->cmd.cdw11;
le32_to_cpu() missing on cmd fields?
> + int buflen = (cdw10 + 1) * sizeof(uint32_t);
Please use size_t buflen to avoid 32 vs 64 and signed vs unsigned issues below.
> + bool eds = cdw11 & 0x1;
> + NvmeNamespace *ns = req->ns;
> + NvmeResvKeys *keys_info;
> +
> + if (!nvme_support_pr(ns)) {
> + return NVME_INVALID_OPCODE;
> + }
> +
> + if (eds) {
> + if (buflen < sizeof(NvmeReservationStatusHeader) +
> + sizeof(uint8_t) * 40) {
> + return NVME_INVALID_FIELD;
> + }
> +
> + num_keys = (buflen - sizeof(NvmeReservationStatusHeader) -
> + sizeof(uint8_t) * 40) /
> + sizeof(struct NvmeRegisteredCtrlExt);
> + } else {
> + if (buflen < sizeof(NvmeReservationStatusHeader)) {
> + return NVME_INVALID_FIELD;
> + }
> +
> + num_keys = (buflen - sizeof(NvmeReservationStatusHeader)) /
> + sizeof(struct NvmeRegisteredCtrl);
> + }
Is it possible to validate num_keys against the actual request sg size?
Or maybe there should be a reasonable maximum so that we don't have to
worry about g_malloc() failing and aborting the process, integer
overflows, etc?
> +
> + keys_info = g_new0(NvmeResvKeys, 1);
> + keys_info->generation = 0;
> + /* num_keys is the maximum number of keys that can be transmitted */
> + keys_info->num_keys = num_keys;
> + keys_info->keys = g_malloc(sizeof(uint64_t) * num_keys);
> + keys_info->req = req;
> +
> + req->aiocb = blk_aio_pr_read_keys(ns->blkconf.blk,
> &keys_info->generation,
> + keys_info->num_keys, keys_info->keys,
> + nvme_resv_read_keys_cb, keys_info);
> +
> + return NVME_NO_COMPLETE;
> +}
> +
> typedef struct NvmeCopyAIOCB {
> BlockAIOCB common;
> BlockAIOCB *aiocb;
> @@ -4469,6 +4807,14 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest
> *req)
> return nvme_dsm(n, req);
> case NVME_CMD_VERIFY:
> return nvme_verify(n, req);
> + case NVME_CMD_RESV_REGISTER:
> + return nvme_resv_register(n, req);
> + case NVME_CMD_RESV_REPORT:
> + return nvme_resv_report(n, req);
> + case NVME_CMD_RESV_ACQUIRE:
> + return nvme_resv_acquire(n, req);
> + case NVME_CMD_RESV_RELEASE:
> + return nvme_resv_release(n, req);
> case NVME_CMD_COPY:
> return nvme_copy(n, req);
> case NVME_CMD_ZONE_MGMT_SEND:
> diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> index a5c903d727..833bcbae08 100644
> --- a/hw/nvme/ns.c
> +++ b/hw/nvme/ns.c
> @@ -60,6 +60,12 @@ void nvme_ns_init_format(NvmeNamespace *ns)
>
> blk_pr_cap = blk_bs(ns->blkconf.blk)->bl.pr_cap;
> id_ns->rescap = block_pr_cap_to_nvme(blk_pr_cap);
> + if (id_ns->rescap != NVME_PR_CAP_ALL &&
> + id_ns->rescap != NVME_PR_CAP_RW) {
> +
> + /* Rescap either supports all or none of them */
> + id_ns->rescap = 0;
> + }
> }
>
> static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
> diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
> index 6d0e456348..23d7173a36 100644
> --- a/hw/nvme/nvme.h
> +++ b/hw/nvme/nvme.h
> @@ -470,6 +470,10 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
> case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND";
> case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV";
> case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND";
> + case NVME_CMD_RESV_REGISTER: return "NVME_CMD_RESV_REGISTER";
> + case NVME_CMD_RESV_REPORT: return "NVME_CMD_RESV_REPORT";
> + case NVME_CMD_RESV_ACQUIRE: return "NVME_CMD_RESV_ACQUIRE";
> + case NVME_CMD_RESV_RELEASE: return "NVME_CMD_RESV_RELEASE";
> default: return "NVME_NVM_CMD_UNKNOWN";
> }
> }
> @@ -558,6 +562,11 @@ static inline uint8_t block_pr_cap_to_nvme(uint8_t
> block_pr_cap)
> return res;
> }
>
> +static inline bool nvme_support_pr(NvmeNamespace *ns)
> +{
> + return (ns->id_ns.rescap & NVME_PR_CAP_RW) == NVME_PR_CAP_RW;
> +}
> +
> typedef struct NvmeSQueue {
> struct NvmeCtrl *ctrl;
> uint16_t sqid;
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 9b9eaeb3a7..2eec339028 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -692,6 +692,13 @@ typedef enum NVMEPrCap {
> NVME_PR_CAP_WR_EX_AR = 1 << 5,
> /* Exclusive Access All Registrants reservation type */
> NVME_PR_CAP_EX_AC_AR = 1 << 6,
> + /* Write and Read reservation type */
> + NVME_PR_CAP_RW = (NVME_PR_CAP_WR_EX |
> + NVME_PR_CAP_EX_AC |
> + NVME_PR_CAP_WR_EX_RO |
> + NVME_PR_CAP_EX_AC_RO |
> + NVME_PR_CAP_WR_EX_AR |
> + NVME_PR_CAP_EX_AC_AR),
>
> NVME_PR_CAP_ALL = (NVME_PR_CAP_PTPL |
> NVME_PR_CAP_WR_EX |
> @@ -702,6 +709,43 @@ typedef enum NVMEPrCap {
> NVME_PR_CAP_EX_AC_AR),
> } NvmePrCap;
>
> +typedef struct QEMU_PACKED NvmeRegisteredCtrl {
> + uint16_t cntlid;
> + uint8_t rcsts;
> + uint8_t rsvd3[5];
> + uint8_t hostid[8];
> + uint64_t rkey;
> +} NvmeRegisteredCtrl;
> +
> +typedef struct QEMU_PACKED NvmeRegisteredCtrlExt {
> + uint16_t cntlid;
> + uint8_t rcsts;
> + uint8_t rsvd3[5];
> + uint64_t rkey;
> + uint8_t hostid[16];
> + uint8_t rsvd32[32];
> +} NvmeRegisteredCtrlExt;
> +
> +typedef struct QEMU_PACKED NvmeReservationStatusHeader {
> + uint32_t gen;
> + uint8_t rtype;
> + uint16_t regctl;
> + uint16_t resv5;
> + uint8_t ptpls;
> + uint8_t resv10[14];
> +} NvmeReservationStatusHeader;
> +
> +typedef struct QEMU_PACKED NvmeReservationStatus {
> + struct NvmeReservationStatusHeader header;
> + struct NvmeRegisteredCtrl regctl_ds[];
> +} NvmeReservationStatus;
> +
> +typedef struct QEMU_PACKED NvmeReservationStatusExt {
> + struct NvmeReservationStatusHeader header;
> + uint8_t rsvd24[40];
> + struct NvmeRegisteredCtrlExt regctl_eds[];
> +} NvmeReservationStatusExt;
> +
> typedef struct QEMU_PACKED NvmeDeleteQ {
> uint8_t opcode;
> uint8_t flags;
> --
> 2.20.1
>
signature.asc
Description: PGP signature
