> -----Original Message----- > From: dev <dev-boun...@dpdk.org> On Behalf Of David Marchand > Sent: Tuesday, July 2, 2019 6:18 PM > To: dev@dpdk.org > Cc: anatoly.bura...@intel.com; alex.william...@redhat.com; > maxime.coque...@redhat.com; tho...@monjalon.net; > step...@networkplumber.org > Subject: [EXT] [dpdk-dev] [RFC PATCH] vfio: move eventfd/interrupt pairing at > setup time > > > ---------------------------------------------------------------------- > Populating the eventfd in rte_intr_enable in each request to vfio triggers a > reconfiguration of the interrupt handler on the kernel side. > The problem is that rte_intr_enable is often used to re-enable masked > interrupts > from drivers interrupt handlers. > > This reconfiguration leaves a window during which a device could send an > interrupt and then the kernel logs this unsolicited interrupt: > [158764.159833] do_IRQ: 9.34 No irq handler for vector > > VFIO api makes it possible to set the fd at setup time. > Make use of this and then we only need to ask for masking/unmasking legacy > interrupts and we have nothing to do for MSI/MSIX. > > "rxtx" interrupts are left untouched but are most likely subject to the same > issue. > > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1654824 > Signed-off-by: David Marchand <david.march...@redhat.com>
Thanks David for this patch. I have tested this patch with FastLinq adapters on which original issue is reported. Tested this with MSI-x and legacy interrupt mode. Tested-by: Shahed Shaikh <shsha...@marvell.com> > --- > Sending as a RFC patch since it is not trivial and can eat your babies. > Could people that know well this parts have a look at this? > Thanks! > > --- > drivers/bus/pci/linux/pci_vfio.c | 78 ++++++------ > lib/librte_eal/linux/eal/eal_interrupts.c | 197 > +++++++----------------------- > 2 files changed, 86 insertions(+), 189 deletions(-) > > diff --git a/drivers/bus/pci/linux/pci_vfio.c > b/drivers/bus/pci/linux/pci_vfio.c > index ebf6ccd..e4c32c4 100644 > --- a/drivers/bus/pci/linux/pci_vfio.c > +++ b/drivers/bus/pci/linux/pci_vfio.c > @@ -186,8 +186,11 @@ > static int > pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) { > - int i, ret, intr_idx; > + char irq_set_buf[sizeof(struct vfio_irq_set) + sizeof(int)]; > + struct vfio_irq_set *irq_set; > enum rte_intr_mode intr_mode; > + int i, ret, intr_idx; > + int fd; > > /* default to invalid index */ > intr_idx = VFIO_PCI_NUM_IRQS; > @@ -219,7 +222,6 @@ > /* start from MSI-X interrupt type */ > for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { > struct vfio_irq_info irq = { .argsz = sizeof(irq) }; > - int fd = -1; > > /* skip interrupt modes we don't want */ > if (intr_mode != RTE_INTR_MODE_NONE && @@ -235,51 > +237,51 @@ > return -1; > } > > + /* found a usable interrupt mode */ > + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) != 0) > + break; > + > /* if this vector cannot be used with eventfd, fail if we > explicitly > * specified interrupt type, otherwise continue */ > - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { > - if (intr_mode != RTE_INTR_MODE_NONE) { > - RTE_LOG(ERR, EAL, > - " interrupt vector does not > support eventfd!\n"); > - return -1; > - } else > - continue; > - } > - > - /* set up an eventfd for interrupts */ > - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > - if (fd < 0) { > - RTE_LOG(ERR, EAL, " cannot set up eventfd, " > - "error %i (%s)\n", errno, > strerror(errno)); > + if (intr_mode != RTE_INTR_MODE_NONE) { > + RTE_LOG(ERR, EAL, " interrupt vector does not support > eventfd!\n"); > return -1; > } > + } > > - dev->intr_handle.fd = fd; > - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; > + if (i < 0) > + return -1; > > - switch (i) { > - case VFIO_PCI_MSIX_IRQ_INDEX: > - intr_mode = RTE_INTR_MODE_MSIX; > - dev->intr_handle.type = > RTE_INTR_HANDLE_VFIO_MSIX; > - break; > - case VFIO_PCI_MSI_IRQ_INDEX: > - intr_mode = RTE_INTR_MODE_MSI; > - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; > - break; > - case VFIO_PCI_INTX_IRQ_INDEX: > - intr_mode = RTE_INTR_MODE_LEGACY; > - dev->intr_handle.type = > RTE_INTR_HANDLE_VFIO_LEGACY; > - break; > - default: > - RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); > - return -1; > - } > + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > + if (fd < 0) { > + RTE_LOG(ERR, EAL, " cannot set up eventfd, error %i (%s)\n", > + errno, strerror(errno)); > + return -1; > + } > > - return 0; > + irq_set = (struct vfio_irq_set *)irq_set_buf; > + irq_set->argsz = sizeof(irq_set_buf); > + irq_set->flags = > VFIO_IRQ_SET_DATA_EVENTFD|VFIO_IRQ_SET_ACTION_TRIGGER; > + irq_set->index = i; > + irq_set->start = 0; > + irq_set->count = 1; > + memcpy(&irq_set->data, &fd, sizeof(int)); > + if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { > + RTE_LOG(ERR, EAL, " error configuring interrupt\n"); > + close(fd); > + return -1; > } > > - /* if we're here, we haven't found a suitable interrupt vector */ > - return -1; > + dev->intr_handle.fd = fd; > + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; > + if (i == VFIO_PCI_MSIX_IRQ_INDEX) > + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; > + else if (i == VFIO_PCI_MSI_IRQ_INDEX) > + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; > + else if (i == VFIO_PCI_INTX_IRQ_INDEX) > + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; > + > + return 0; > } > > #ifdef HAVE_VFIO_DEV_REQ_INTERFACE > diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c > b/lib/librte_eal/linux/eal/eal_interrupts.c > index 79ad5e8..27976b3 100644 > --- a/lib/librte_eal/linux/eal/eal_interrupts.c > +++ b/lib/librte_eal/linux/eal/eal_interrupts.c > @@ -109,42 +109,19 @@ struct rte_intr_source { > > /* enable legacy (INTx) interrupts */ > static int > -vfio_enable_intx(const struct rte_intr_handle *intr_handle) { > - struct vfio_irq_set *irq_set; > - char irq_set_buf[IRQ_SET_BUF_LEN]; > - int len, ret; > - int *fd_ptr; > - > - len = sizeof(irq_set_buf); > - > - /* enable INTx */ > - irq_set = (struct vfio_irq_set *) irq_set_buf; > - irq_set->argsz = len; > - irq_set->count = 1; > - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | > VFIO_IRQ_SET_ACTION_TRIGGER; > - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; > - irq_set->start = 0; > - fd_ptr = (int *) &irq_set->data; > - *fd_ptr = intr_handle->fd; > - > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); > - > - if (ret) { > - RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd > %d\n", > - intr_handle->fd); > - return -1; > - } > +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { > + struct vfio_irq_set irq_set; > + int ret; > > - /* unmask INTx after enabling */ > - memset(irq_set, 0, len); > - len = sizeof(struct vfio_irq_set); > - irq_set->argsz = len; > - irq_set->count = 1; > - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_UNMASK; > - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; > - irq_set->start = 0; > + /* unmask INTx */ > + irq_set.argsz = sizeof(irq_set); > + irq_set.flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_UNMASK; > + irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; > + irq_set.start = 0; > + irq_set.count = 1; > > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); > + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); > > if (ret) { > RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd > %d\n", @@ -156,128 +133,51 @@ struct rte_intr_source { > > /* disable legacy (INTx) interrupts */ > static int > -vfio_disable_intx(const struct rte_intr_handle *intr_handle) { > - struct vfio_irq_set *irq_set; > - char irq_set_buf[IRQ_SET_BUF_LEN]; > - int len, ret; > - > - len = sizeof(struct vfio_irq_set); > +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { > + struct vfio_irq_set irq_set; > + int ret; > > - /* mask interrupts before disabling */ > - irq_set = (struct vfio_irq_set *) irq_set_buf; > - irq_set->argsz = len; > - irq_set->count = 1; > - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_MASK; > - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; > - irq_set->start = 0; > + /* mask interrupts */ > + irq_set.argsz = sizeof(irq_set); > + irq_set.flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_MASK; > + irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; > + irq_set.start = 0; > + irq_set.count = 1; > > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); > + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); > > if (ret) { > RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", > intr_handle->fd); > return -1; > } > - > - /* disable INTx*/ > - memset(irq_set, 0, len); > - irq_set->argsz = len; > - irq_set->count = 0; > - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_TRIGGER; > - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; > - irq_set->start = 0; > - > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); > - > - if (ret) { > - RTE_LOG(ERR, EAL, > - "Error disabling INTx interrupts for fd %d\n", > intr_handle->fd); > - return -1; > - } > - return 0; > -} > - > -/* enable MSI interrupts */ > -static int > -vfio_enable_msi(const struct rte_intr_handle *intr_handle) { > - int len, ret; > - char irq_set_buf[IRQ_SET_BUF_LEN]; > - struct vfio_irq_set *irq_set; > - int *fd_ptr; > - > - len = sizeof(irq_set_buf); > - > - irq_set = (struct vfio_irq_set *) irq_set_buf; > - irq_set->argsz = len; > - irq_set->count = 1; > - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | > VFIO_IRQ_SET_ACTION_TRIGGER; > - irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; > - irq_set->start = 0; > - fd_ptr = (int *) &irq_set->data; > - *fd_ptr = intr_handle->fd; > - > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); > - > - if (ret) { > - RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", > - intr_handle->fd); > - return -1; > - } > return 0; > } > > -/* disable MSI interrupts */ > -static int > -vfio_disable_msi(const struct rte_intr_handle *intr_handle) { > - struct vfio_irq_set *irq_set; > - char irq_set_buf[IRQ_SET_BUF_LEN]; > - int len, ret; > - > - len = sizeof(struct vfio_irq_set); > - > - irq_set = (struct vfio_irq_set *) irq_set_buf; > - irq_set->argsz = len; > - irq_set->count = 0; > - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_TRIGGER; > - irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; > - irq_set->start = 0; > - > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); > - > - if (ret) > - RTE_LOG(ERR, EAL, > - "Error disabling MSI interrupts for fd %d\n", > intr_handle->fd); > - > - return ret; > -} > - > /* enable MSI-X interrupts */ > static int > -vfio_enable_msix(const struct rte_intr_handle *intr_handle) { > - int len, ret; > +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { > char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; > struct vfio_irq_set *irq_set; > - int *fd_ptr; > + int len, ret; > + > + if (intr_handle->nb_efd == 0) > + return 0; > > len = sizeof(irq_set_buf); > > irq_set = (struct vfio_irq_set *) irq_set_buf; > irq_set->argsz = len; > - /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ > - irq_set->count = intr_handle->max_intr ? > - (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? > - RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; > irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | > VFIO_IRQ_SET_ACTION_TRIGGER; > irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; > - irq_set->start = 0; > - fd_ptr = (int *) &irq_set->data; > - /* INTR vector offset 0 reserve for non-efds mapping */ > - fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; > - memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, > - sizeof(*intr_handle->efds) * intr_handle->nb_efd); > + irq_set->start = RTE_INTR_VEC_RXTX_OFFSET; > + irq_set->count = intr_handle->nb_efd; > + memcpy(&irq_set->data, intr_handle->efds, > + sizeof(*intr_handle->efds) * intr_handle->nb_efd); > > ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); > - > if (ret) { > RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd > %d\n", > intr_handle->fd); > @@ -289,22 +189,21 @@ struct rte_intr_source { > > /* disable MSI-X interrupts */ > static int > -vfio_disable_msix(const struct rte_intr_handle *intr_handle) { > - struct vfio_irq_set *irq_set; > - char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; > - int len, ret; > - > - len = sizeof(struct vfio_irq_set); > +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { > + struct vfio_irq_set irq_set; > + int ret; > > - irq_set = (struct vfio_irq_set *) irq_set_buf; > - irq_set->argsz = len; > - irq_set->count = 0; > - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_TRIGGER; > - irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; > - irq_set->start = 0; > + if (intr_handle->nb_efd == 0) > + return 0; > > - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); > + irq_set.argsz = sizeof(irq_set); > + irq_set.flags = VFIO_IRQ_SET_DATA_NONE | > VFIO_IRQ_SET_ACTION_TRIGGER; > + irq_set.index = VFIO_PCI_MSIX_IRQ_INDEX; > + irq_set.start = RTE_INTR_VEC_RXTX_OFFSET; > + irq_set.count = intr_handle->nb_efd; > > + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set); > if (ret) > RTE_LOG(ERR, EAL, > "Error disabling MSI-X interrupts for fd %d\n", > intr_handle->fd); @@ -665,9 +564,7 @@ struct rte_intr_source { > return -1; > break; > case RTE_INTR_HANDLE_VFIO_MSI: > - if (vfio_enable_msi(intr_handle)) > - return -1; > - break; > + return 0; > case RTE_INTR_HANDLE_VFIO_LEGACY: > if (vfio_enable_intx(intr_handle)) > return -1; > @@ -721,9 +618,7 @@ struct rte_intr_source { > return -1; > break; > case RTE_INTR_HANDLE_VFIO_MSI: > - if (vfio_disable_msi(intr_handle)) > - return -1; > - break; > + return 0; > case RTE_INTR_HANDLE_VFIO_LEGACY: > if (vfio_disable_intx(intr_handle)) > return -1; > -- > 1.8.3.1