Am 27.02.2013 01:47, schrieb Keith Busch: > NVM Express is an open standard for PCI-e attached Non-Volatile Memory > storage. This commit adds an emulated device that supports the register > interface and command set defined by this standard. The standard can > be viewed at nvmexpress.org. This initial commit implements the minimum > amount required for an nvme device to work with existing block drivers. > > Cc: Keith Busch <keith.bu...@gmail.com> > Signed-off-by: Keith Busch <keith.bu...@intel.com> > ---
Sorry, I never got around to finishing review of v1, some more comments inline: [...] > diff --git a/hw/nvme.c b/hw/nvme.c > new file mode 100644 > index 0000000..dfc93ee > --- /dev/null > +++ b/hw/nvme.c [...] > +static int nvme_init(PCIDevice *pci_dev) > +{ > + NvmeCtrl *n = DO_UPCAST(NvmeCtrl, dev, pci_dev); Please don't use DO_UPCAST() for QOM objects, introduce a QOM cast macro NVME(obj) instead. We've collected such review comments here: http://wiki.qemu.org/QOMConventions Not mentioned there but in hw/qdev-core.h: We've prepared some new infrastructure since your original submission. Could you please split your nvme_init into a non-failing trivial .instance_init void nvme_init(Object *obj) and a second-stage int nvme_initfn(PCIDevice *pci_dev) that we can later easily change to void nvme_realize(DeviceState *dev, Error **errp) ? > + NvmeIdCtrl *id = &n->id_ctrl; > + uint8_t *pci_conf; > + int64_t bs_size; > + int i, j; > + > + if (!n->conf.bs) { > + return -1; > + } > + > + bs_size = bdrv_getlength(n->conf.bs); Accidental extra space? > + if (bs_size <= 0) { > + return -1; > + } > + > + pci_conf = pci_dev->config; > + pci_conf[PCI_INTERRUPT_PIN] = 1; > + pci_config_set_prog_interface(pci_dev->config, 0x2); > + pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); > + > + n->num_namespaces = 1; > + n->num_queues = 64; > + n->max_q_ents = 0x7ff; > + n->reg_size = 1 << qemu_fls(0x1004 + 2 * (n->num_queues + 1) * 4); > + n->ns_size = bs_size / n->num_namespaces; > + > + n->instance = instance++; > + n->namespaces = g_malloc0(sizeof(*n->namespaces)*n->num_namespaces); > + n->sq = g_malloc0(sizeof(*n->sq)*n->num_queues); > + n->cq = g_malloc0(sizeof(*n->cq)*n->num_queues); > + > + id->vid = PCI_VENDOR_ID_INTEL; > + id->ssvid = 0x0111; > + id->rab = 6; > + id->ieee[0] = 0x00; > + id->ieee[1] = 0x02; > + id->ieee[2] = 0xb3; > + id->sqes = 0xf << 4 | 0x6; > + id->cqes = 0xf << 4 | 0x4; I assume you've verified operator precedence does what you want, but adding parentheses around the shift would make that clear to everyone. > + id->nn = n->num_namespaces; > + id->vwc = 1; > + snprintf((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl"); > + snprintf((char *)id->fr, sizeof(id->fr), "1.0"); > + snprintf((char *)id->sn, sizeof(id->sn), "NVMeQx10%02x", n->instance); Does this linear instance counting cope with hot-plug and hot-unplug as expected? > + id->psd[0].mp = 0x9c4; > + id->psd[0].enlat = 0x10; > + id->psd[0].exlat = 0x4; > + > + n->bar.cap = (uint64_t)(n->max_q_ents & CAP_MQES_MASK) << > CAP_MQES_SHIFT; > + n->bar.cap |= (uint64_t)(1 & CAP_CQR_MASK) << CAP_CQR_SHIFT; > + n->bar.cap |= (uint64_t)(1 & CAP_AMS_MASK) << CAP_AMS_SHIFT; > + n->bar.cap |= (uint64_t)(0xf & CAP_TO_MASK) << CAP_TO_SHIFT; > + n->bar.cap |= (uint64_t)(1 & CAP_CSS_MASK) << CAP_CSS_SHIFT; > + n->bar.cap |= (uint64_t)(0xf & CAP_MPSMAX_MASK) << CAP_MPSMAX_SHIFT; > + n->bar.vs = 0x00010001; > + n->bar.intmc = n->bar.intms = 0; > + > + memory_region_init_io(&n->iomem, &nvme_mmio_ops, n, "nvme-mmio", > + n->reg_size); > + pci_register_bar(&n->dev, 0, > + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, > + &n->iomem); > + msix_init_exclusive_bar(&n->dev, n->num_queues, 4); > + > + for (i = 0; i < n->num_namespaces; i++) { > + NvmeNamespace *ns = &n->namespaces[i]; > + NvmeIdNs *id_ns = &ns->id_ns; > + id_ns->ncap = id_ns->nsze = (n->ns_size) >> 9; > + id_ns->nlbaf = 0x4; > + > + for (j = 0; j <= id_ns->nlbaf; j++) { > + id_ns->lbaf[j].ds = 9 + j; > + } > + ns->id = i + 1; > + ns->ctrl = n; > + ns->start_block = id_ns->nsze * i; > + } > + > + return 0; > +} > + > +static void nvme_exit(PCIDevice *pci_dev) > +{ > + NvmeCtrl *n = DO_UPCAST(NvmeCtrl, dev, pci_dev); NVME(pci_dev) > + nvme_clear_ctrl(n); > + g_free(n->namespaces); > + g_free(n->cq); > + g_free(n->sq); > + msix_uninit_exclusive_bar(pci_dev); > + memory_region_destroy(&n->iomem); > +} Split off .instance_finalize function matching .instance_init / pc->init split above if applicable? > + > +static void nvme_reset(DeviceState *dev) > +{ > + NvmeCtrl *n = DO_UPCAST(NvmeCtrl, dev.qdev, dev); > + (void)n; Add NvmeCtrl *n = NVME(dev) once needed only? BTW please name NvmeCtrl's dev field parent_obj, then you easily see where it is still used outside VMState. > +} > + > +static Property nvme_props[] = { > + DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), > + DEFINE_PROP_END_OF_LIST(), > +}; > + > +static const VMStateDescription nvme_vmstate = { > + .name = "nvme", > + .version_id = 1, > + .minimum_version_id = 1, > + .minimum_version_id_old = 1, > + .fields = (VMStateField[]) { > + VMSTATE_PCI_DEVICE(dev, NvmeCtrl), > + VMSTATE_END_OF_LIST() > + } > +}; > + > +static void nvme_class_init(ObjectClass *oc, void *data) > +{ > + DeviceClass *dc = DEVICE_CLASS(oc); > + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); > + > + pc->init = nvme_init; > + pc->exit = nvme_exit; > + pc->class_id = PCI_CLASS_STORAGE_EXPRESS; > + pc->vendor_id = PCI_VENDOR_ID_INTEL; > + pc->device_id = 0x0111; > + pc->revision = 1; > + > + dc->desc = "Non-Volatile Memory Express"; > + dc->reset = nvme_reset; > + dc->props = nvme_props; > + dc->vmsd = &nvme_vmstate; > +} > + > +static TypeInfo nvme_info = { static const please, all in-tree devices have been updated recently. > + .name = "nvme", > + .parent = TYPE_PCI_DEVICE, > + .instance_size = sizeof(NvmeCtrl), > + .class_init = nvme_class_init, > +}; > + > +static void nvme_register_devices(void) > +{ > + type_register_static(&nvme_info); > +} > +type_init(nvme_register_devices); Please drop semicolon, it's not a statement, and rename to nvme_register_types (had been cleaned up early 2012). Also please add a white line before type_init() and whenever there's more than one line of statements in a function to separate the variable declaration block. > diff --git a/hw/nvme.h b/hw/nvme.h > new file mode 100644 > index 0000000..5716f51 > --- /dev/null > +++ b/hw/nvme.h > @@ -0,0 +1,666 @@ > +#ifndef _NVME_H > +#define _NVME_H C99 is said to reserve leading underscores, suggest HW_NVME_H if you want to distinguish from potential system/library header. [...] > diff --git a/hw/pci/pci-hotplug.c b/hw/pci/pci-hotplug.c > index f38df30..992db47 100644 > --- a/hw/pci/pci-hotplug.c > +++ b/hw/pci/pci-hotplug.c > @@ -84,8 +84,8 @@ static int scsi_hot_add(Monitor *mon, DeviceState *adapter, > object_dynamic_cast(OBJECT(QLIST_FIRST(&adapter->child_bus)), > TYPE_SCSI_BUS); > if (!scsibus) { > - error_report("Device is not a SCSI adapter"); > - return -1; > + error_report("Device is not a SCSI adapter"); > + return -1; > } > > /* [snip] Unrelated whitespace cleanup hunk? Regards, Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg