On Jun 27 13:47, Niklas Cassel wrote: > TP4084 adds a new mode, CC.CRIME, that can be used to mark a namespace > as ready independently from the controller. > > When CC.CRIME is 0 (default), things behave as before, all namespaces > are ready when CSTS.RDY gets set to 1. > > When CC.CRIME is 1, the controller will become ready when CSTS.RDY gets > set to 1, but commands accessing a namespace are allowed to return > "Namespace Not Ready" or "Admin Command Media Not Ready". > After CRTO.CRWMT amount of time, if the namespace has not yet been > marked ready, the status codes also need to have the DNR bit set. > > Add a new "ready_delay" namespace device parameter, in order to emulate > different ready latencies for namespaces. > > Once a namespace is ready, it will set the NRDY bit in the I/O Command > Set Independent Identify Namespace Data Structure, and then send out a > Namespace Attribute Changed event. > > This new "ready_delay" is supported on controllers not part of a NVMe > subsystem. The reasons are many. One problem is that multiple controllers > can have different CC.CRIME modes running. Another problem is the extra > locking needed. The third problem is when to actually clear NRDY. If we > assume that a namespace clears NRDY when it no longer has any controller > online for that namespace. The problem then is that Linux will reset the > controllers one by one during probe time. The reset goes so fast so that > there is no time when all controllers are in reset at the same time, so > NRDY will never get cleared. (The controllers are enabled by SeaBIOS by > default.) We could introduce a reset_time param, but this would only > increase the chances that all controllers are in reset at the same time. > > Signed-off-by: Niklas Cassel <[email protected]> > --- > hw/nvme/ctrl.c | 123 +++++++++++++++++++++++++++++++++++++++++-- > hw/nvme/ns.c | 18 +++++++ > hw/nvme/nvme.h | 6 +++ > hw/nvme/trace-events | 1 + > include/block/nvme.h | 60 ++++++++++++++++++++- > 5 files changed, 204 insertions(+), 4 deletions(-) > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > index 8ca824ea14..5404f67480 100644 > --- a/hw/nvme/ctrl.c > +++ b/hw/nvme/ctrl.c > @@ -88,6 +88,12 @@ > * completion when there are no outstanding AERs. When the maximum number > of > * enqueued events are reached, subsequent events will be dropped. > * > + * - `crwmt` > + * This is the Controller Ready With Media Timeout (CRWMT) field that is > + * defined in the CRTO register. This specifies the worst-case time that > host > + * software should wait for the controller and all attached namespaces to > + * become ready. The value is in units of 500 milliseconds. > + * > * - `mdts` > * Indicates the maximum data transfer size for a command that transfers > data > * between host-accessible memory and the controller. The value is > specified > @@ -157,6 +163,11 @@ > * namespace will be available in the subsystem but not attached to any > * controllers. > * > + * - `ready_delay` > + * This parameter specifies the amount of time that the namespace should > wait > + * before being marked ready. Only applicable if CC.CRIME is set by the > user. > + * The value is in units of 500 milliseconds (to be consistent with > `crwmt`). > + * > * Setting `zoned` to true selects Zoned Command Set at the namespace. > * In this case, the following namespace properties are available to > configure > * zoned operation: > @@ -216,6 +227,8 @@ > #define NVME_VF_RES_GRANULARITY 1 > #define NVME_VF_OFFSET 0x1 > #define NVME_VF_STRIDE 1 > +#define NVME_DEFAULT_CRIMT 0xa > +#define NVME_DEFAULT_CRWMT 0xf > > #define NVME_GUEST_ERR(trace, fmt, ...) \ > do { \ > @@ -4188,6 +4201,10 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest > *req) > return NVME_INVALID_OPCODE | NVME_DNR; > } > > + if (!(ns->id_indep_ns.nstat & NVME_NSTAT_NRDY)) { > + return NVME_NS_NOT_READY; > + } > +
I'd add a ns->ready bool instead. See below for my previously posted
patch.
> if (ns->status) {
> return ns->status;
> }
> @@ -4791,6 +4808,27 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n,
> NvmeRequest *req, bool active)
> return NVME_INVALID_CMD_SET | NVME_DNR;
> }
>
> +static uint16_t nvme_identify_cs_indep_ns(NvmeCtrl *n, NvmeRequest *req)
> +{
> + NvmeNamespace *ns;
> + NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
> + uint32_t nsid = le32_to_cpu(c->nsid);
> +
> + trace_pci_nvme_identify_cs_indep_ns(nsid);
> +
> + if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
> + return NVME_INVALID_NSID | NVME_DNR;
> + }
> +
> + ns = nvme_ns(n, nsid);
> + if (unlikely(!ns)) {
> + return nvme_rpt_empty_id_struct(n, req);
> + }
> +
> + return nvme_c2h(n, (uint8_t *)&ns->id_indep_ns, sizeof(NvmeIdNsCsIndep),
> + req);
> +}
> +
I posted a patch for this some time ago
https://lore.kernel.org/qemu-devel/[email protected]/
The structure is so simple that we can just "build" it here instead of
adding yet another (mostly empty) 4k member to the NvmeNamespace struct.
> static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
> bool attached)
> {
> @@ -5081,6 +5119,8 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest
> *req)
> return nvme_identify_ns(n, req, true);
> case NVME_ID_CNS_NS_PRESENT:
> return nvme_identify_ns(n, req, false);
> + case NVME_ID_CNS_CS_INDEPENDENT_NS:
> + return nvme_identify_cs_indep_ns(n, req);
> case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST:
> return nvme_identify_ctrl_list(n, req, true);
> case NVME_ID_CNS_CTRL_LIST:
> @@ -5556,6 +5596,44 @@ static void nvme_select_iocs_ns(NvmeCtrl *n,
> NvmeNamespace *ns)
> }
> }
>
> +void nvme_ns_ready_cb(void *opaque)
> +{
> + NvmeNamespace *ns = opaque;
> + DeviceState *dev = &ns->parent_obj;
> + BusState *s = qdev_get_parent_bus(dev);
> + NvmeCtrl *n = NVME(s->parent);
> +
> + ns->id_indep_ns.nstat |= NVME_NSTAT_NRDY;
> +
> + if (!test_and_set_bit(ns->params.nsid, n->changed_nsids)) {
> + nvme_enqueue_event(n, NVME_AER_TYPE_NOTICE,
> + NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
> + NVME_LOG_CHANGED_NSLIST);
> + }
> +}
Move to hw/nvme/ns.c.
> +
> +static void nvme_set_ready_or_start_timer(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> + int64_t expire_time;
> +
> + if (!NVME_CC_CRIME(ldl_le_p(&n->bar.cc)) || ns->params.ready_delay == 0)
> {
> + ns->id_indep_ns.nstat |= NVME_NSTAT_NRDY;
> + return;
> + }
> +
> + expire_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> + expire_time += ns->params.ready_delay * 500;
> + timer_mod(ns->ready_delay_timer, expire_time);
> +}
This can be made independent of NvmeCtrl by passing the CRIME value (and
then moved to hw/nvme/ns.c).
> +
> +static inline void nvme_ns_clear_ready_and_stop_timer(NvmeNamespace *ns)
> +{
> + if (!ns->subsys) {
> + timer_del(ns->ready_delay_timer);
> + ns->id_indep_ns.nstat &= ~NVME_NSTAT_NRDY;
> + }
> +}
> +
Move to hw/nvme/ns.c.
signature.asc
Description: PGP signature
