On Wed, Jan 28, 2015 at 3:34 PM, Michael S. Tsirkin <m...@redhat.com> wrote: > I had to drop the dpdk mailing list from Cc. > Added qemu mailing list, please copy patches there > in the future. > > On Mon, Jan 26, 2015 at 11:20:36AM +0800, Huawei Xie wrote: >> >> Signed-off-by: Huawei Xie <huawei....@intel.com> > > Overall, I think it's a reasonable implementation. > Some comments below: > >> --- >> lib/librte_vhost/Makefile | 5 +- >> lib/librte_vhost/vhost-net.h | 4 + >> lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 12 +- >> lib/librte_vhost/vhost_user/fd_man.c | 4 +- >> lib/librte_vhost/vhost_user/vhost-net-user.c | 428 >> ++++++++++++++++++++++++++ >> lib/librte_vhost/vhost_user/vhost-net-user.h | 108 +++++++ >> lib/librte_vhost/vhost_user/virtio-net-user.c | 205 ++++++++++++ >> lib/librte_vhost/vhost_user/virtio-net-user.h | 48 +++ >> lib/librte_vhost/virtio-net.c | 26 +- >> lib/librte_vhost/virtio-net.h | 43 +++ >> 10 files changed, 865 insertions(+), 18 deletions(-) >> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c >> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h >> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c >> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h >> create mode 100644 lib/librte_vhost/virtio-net.h >> >> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile >> index e0d0ef6..b2f14a0 100644 >> --- a/lib/librte_vhost/Makefile >> +++ b/lib/librte_vhost/Makefile >> @@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk >> # library name >> LIB = librte_vhost.a >> >> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 >> -D_FILE_OFFSET_BITS=64 -lfuse >> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 >> -D_FILE_OFFSET_BITS=64 -lfuse >> LDFLAGS += -lfuse >> # all source are stored in SRCS-y >> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c >> vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c >> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c >> vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c >> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c >> vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c >> >> # install includes >> SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h >> diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h >> index 11737cc..3f18f25 100644 >> --- a/lib/librte_vhost/vhost-net.h >> +++ b/lib/librte_vhost/vhost-net.h >> @@ -41,8 +41,12 @@ >> >> #include <rte_log.h> >> >> +#include "rte_virtio_net.h" >> + >> #define VHOST_MEMORY_MAX_NREGIONS 8 >> >> +extern struct vhost_net_device_ops const *ops; >> + >> /* Macros for printing using RTE_LOG */ >> #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 >> #define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1 >> diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c >> b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c >> index edcbc10..1d2c403 100644 >> --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c >> +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c >> @@ -50,8 +50,7 @@ >> #include "rte_virtio_net.h" >> #include "vhost-net.h" >> #include "virtio-net-cdev.h" >> - >> -extern struct vhost_net_device_ops const *ops; >> +#include "virtio-net.h" >> >> /* Line size for reading maps file. */ >> static const uint32_t BUFSIZE = PATH_MAX; >> @@ -268,6 +267,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx, >> struct vhost_memory_region *mem_regions = (void *)(uintptr_t) >> ((uint64_t)(uintptr_t)mem_regions_addr + size); >> uint64_t base_address = 0, mapped_address, mapped_size; >> + struct virtio_net *dev; >> >> for (idx = 0; idx < nregions; idx++) { >> regions[idx].guest_phys_address = >> @@ -335,6 +335,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx, >> regions[idx].guest_phys_address; >> } >> >> + dev = get_device(ctx); >> + if (dev && dev->mem && dev->mem->mapped_address) { >> + munmap((void *)(uintptr_t)dev->mem->mapped_address, >> + (size_t)dev->mem->mapped_size); >> + free(dev->mem); >> + dev->mem = NULL; >> + } >> + >> ops->set_mem_table(ctx, ®ions[0], valid_regions); >> return 0; >> } >> diff --git a/lib/librte_vhost/vhost_user/fd_man.c >> b/lib/librte_vhost/vhost_user/fd_man.c >> index 09187e0..0d2beb9 100644 >> --- a/lib/librte_vhost/vhost_user/fd_man.c >> +++ b/lib/librte_vhost/vhost_user/fd_man.c >> @@ -72,7 +72,7 @@ fdset_find_free_slot(struct fdset *pfdset) >> >> static void >> fdset_add_fd(struct fdset *pfdset, int idx, int fd, >> - fd_cb rcb, fd_cb wcb, uint64_t dat) >> + fd_cb rcb, fd_cb wcb, void *dat) >> { >> struct fdentry *pfdentry; >> >> @@ -138,7 +138,7 @@ fdset_init(struct fdset *pfdset) >> * Register the fd in the fdset with read/write handler and context. >> */ >> int >> -fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat) >> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) >> { >> int i; >> >> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c >> b/lib/librte_vhost/vhost_user/vhost-net-user.c >> new file mode 100644 >> index 0000000..c84fd3b >> --- /dev/null >> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c >> @@ -0,0 +1,428 @@ >> +/*- >> + * BSD LICENSE >> + * >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * >> + * * Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * * Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in >> + * the documentation and/or other materials provided with the >> + * distribution. >> + * * Neither the name of Intel Corporation nor the names of its >> + * contributors may be used to endorse or promote products derived >> + * from this software without specific prior written permission. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> + >> +#include <stdint.h> >> +#include <stdio.h> >> +#include <limits.h> >> +#include <stdlib.h> >> +#include <unistd.h> >> +#include <string.h> >> +#include <sys/types.h> >> +#include <sys/socket.h> >> +#include <sys/un.h> >> +#include <errno.h> >> + >> +#include <rte_log.h> >> +#include <rte_virtio_net.h> >> + >> +#include "fd_man.h" >> +#include "vhost-net-user.h" >> +#include "vhost-net.h" >> +#include "virtio-net-user.h" >> + >> +static void vserver_new_vq_conn(int fd, void *data); >> +static void vserver_message_handler(int fd, void *dat); >> +struct vhost_net_device_ops const *ops; >> + >> +static struct vhost_server *g_vhost_server; >> + >> +static const char *vhost_message_str[VHOST_USER_MAX] = { >> + [VHOST_USER_NONE] = "VHOST_USER_NONE", >> + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", >> + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", >> + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", >> + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", >> + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", >> + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", >> + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", >> + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", >> + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", >> + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", >> + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", >> + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", >> + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", >> + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR" >> +}; >> + >> +/** >> + * Create a unix domain socket, bind to path and listen for connection. >> + * @return >> + * socket fd or -1 on failure >> + */ >> +static int >> +uds_socket(const char *path) >> +{ >> + struct sockaddr_un un; >> + int sockfd; >> + int ret; >> + >> + if (path == NULL) >> + return -1; >> + >> + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); >> + if (sockfd < 0) >> + return -1; >> + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd); >> + >> + memset(&un, 0, sizeof(un)); >> + un.sun_family = AF_UNIX; >> + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); >> + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un)); >> + if (ret == -1) >> + goto err; >> + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); >> + >> + ret = listen(sockfd, 1); >> + if (ret == -1) >> + goto err; >> + >> + return sockfd; >> + >> +err: >> + close(sockfd); >> + return -1; >> +} >> + >> +/* return bytes# of read on success or negative val on failure. */ >> +static int >> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) >> +{ >> + struct iovec iov; >> + struct msghdr msgh = { 0 }; >> + size_t fdsize = fd_num * sizeof(int); >> + char control[CMSG_SPACE(fdsize)]; >> + struct cmsghdr *cmsg; >> + int ret; >> + >> + iov.iov_base = buf; >> + iov.iov_len = buflen; >> + >> + msgh.msg_iov = &iov; >> + msgh.msg_iovlen = 1; >> + msgh.msg_control = control; >> + msgh.msg_controllen = sizeof(control); >> + >> + ret = recvmsg(sockfd, &msgh, 0); >> + if (ret <= 0) { >> + RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n"); >> + return ret; >> + } >> + >> + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { >> + RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n"); >> + return -1; >> + } >> + >> + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; >> + cmsg = CMSG_NXTHDR(&msgh, cmsg)) { >> + if ((cmsg->cmsg_level == SOL_SOCKET) && >> + (cmsg->cmsg_type == SCM_RIGHTS)) { >> + memcpy(fds, CMSG_DATA(cmsg), fdsize); >> + break; >> + } >> + } >> + >> + return ret; >> +} >> + >> +/* return bytes# of read on success or negative val on failure. */ >> +static int >> +read_vhost_message(int sockfd, struct VhostUserMsg *msg) >> +{ >> + int ret; >> + >> + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, >> + msg->fds, VHOST_MEMORY_MAX_NREGIONS); >> + if (ret <= 0) >> + return ret; >> + >> + if (msg && msg->size) { >> + if (msg->size > sizeof(msg->payload)) { >> + RTE_LOG(ERR, VHOST_CONFIG, >> + "invalid msg size: %d\n", msg->size); >> + return -1; >> + } >> + ret = read(sockfd, &msg->payload, msg->size); >> + if (ret <= 0) >> + return ret; >> + if (ret != (int)msg->size) { >> + RTE_LOG(ERR, VHOST_CONFIG, >> + "read control message failed\n"); >> + return -1; >> + } >> + } >> + >> + return ret; >> +} >> + >> +static int >> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) >> +{ >> + >> + struct iovec iov; >> + struct msghdr msgh = { 0 }; >> + size_t fdsize = fd_num * sizeof(int); >> + char control[CMSG_SPACE(fdsize)]; >> + struct cmsghdr *cmsg; >> + int ret; >> + >> + iov.iov_base = buf; >> + iov.iov_len = buflen; >> + >> + msgh.msg_iov = &iov; >> + msgh.msg_iovlen = 1; >> + >> + if (fds && fd_num > 0) { >> + msgh.msg_control = control; >> + msgh.msg_controllen = sizeof(control); >> + cmsg = CMSG_FIRSTHDR(&msgh); >> + cmsg->cmsg_len = CMSG_LEN(fdsize); >> + cmsg->cmsg_level = SOL_SOCKET; >> + cmsg->cmsg_type = SCM_RIGHTS; >> + memcpy(CMSG_DATA(cmsg), fds, fdsize); >> + } else { >> + msgh.msg_control = NULL; >> + msgh.msg_controllen = 0; >> + } >> + >> + do { >> + ret = sendmsg(sockfd, &msgh, 0); >> + } while (ret < 0 && errno == EINTR); >> + >> + if (ret < 0) { >> + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n"); >> + return ret; >> + } >> + >> + return ret; >> +} >> + >> +static int >> +send_vhost_message(int sockfd, struct VhostUserMsg *msg) >> +{ >> + int ret; >> + >> + if (!msg) >> + return 0; >> + >> + msg->flags &= ~VHOST_USER_VERSION_MASK; >> + msg->flags |= VHOST_USER_VERSION; >> + msg->flags |= VHOST_USER_REPLY_MASK; >> + >> + ret = send_fd_message(sockfd, (char *)msg, >> + VHOST_USER_HDR_SIZE + msg->size, NULL, 0); >> + >> + return ret; >> +} >> + >> +/* call back when there is new virtio connection. */ >> +static void >> +vserver_new_vq_conn(int fd, void *dat) >> +{ >> + struct vhost_server *vserver = (struct vhost_server *)dat; >> + int conn_fd; >> + int fh; >> + struct vhost_device_ctx vdev_ctx = { 0 }; >> + >> + conn_fd = accept(fd, NULL, NULL); >> + RTE_LOG(INFO, VHOST_CONFIG, >> + "new virtio connection is %d\n", conn_fd); >> + if (conn_fd < 0) >> + return; >> + >> + fh = ops->new_device(vdev_ctx); >> + if (fh == -1) { >> + close(conn_fd); >> + return; >> + } >> + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh); >> + >> + fdset_add(&vserver->fdset, >> + conn_fd, vserver_message_handler, NULL, (void *)fh); >> +} >> + >> +/* callback when there is message on the connfd */ >> +static void >> +vserver_message_handler(int connfd, void *dat) >> +{ >> + struct vhost_device_ctx ctx; >> + uint32_t fh = (uint32_t)dat; >> + struct VhostUserMsg msg; >> + uint64_t features; >> + int ret; >> + >> + ctx.fh = fh; >> + ret = read_vhost_message(connfd, &msg); >> + if (ret < 0) { >> + RTE_LOG(ERR, VHOST_CONFIG, >> + "vhost read message failed\n"); >> + >> + close(connfd); >> + fdset_del(&g_vhost_server->fdset, connfd); >> + ops->destroy_device(ctx); >> + >> + return; >> + } else if (ret == 0) { >> + RTE_LOG(INFO, VHOST_CONFIG, >> + "vhost peer closed\n"); >> + >> + close(connfd); >> + fdset_del(&g_vhost_server->fdset, connfd); >> + ops->destroy_device(ctx); >> + >> + return; >> + } >> + if (msg.request > VHOST_USER_MAX) { >> + RTE_LOG(ERR, VHOST_CONFIG, >> + "vhost read incorrect message\n"); >> + >> + close(connfd); >> + fdset_del(&g_vhost_server->fdset, connfd); >> + >> + return; >> + } >> + >> + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", >> + vhost_message_str[msg.request]); >> + switch (msg.request) { >> + case VHOST_USER_GET_FEATURES: >> + ret = ops->get_features(ctx, &features); >> + msg.payload.u64 = features; >> + msg.size = sizeof(msg.payload.u64); >> + send_vhost_message(connfd, &msg); > > What if this fails (e.g. remote died)? > How will everything be cleaned up? > >> + break; >> + case VHOST_USER_SET_FEATURES: >> + features = msg.payload.u64; >> + ops->set_features(ctx, &features); >> + break; >> + >> + case VHOST_USER_SET_OWNER: >> + ops->set_owner(ctx); >> + break; >> + case VHOST_USER_RESET_OWNER: >> + ops->reset_owner(ctx); >> + break; >> + >> + case VHOST_USER_SET_MEM_TABLE: >> + user_set_mem_table(ctx, &msg); >> + break; >> + >> + case VHOST_USER_SET_LOG_BASE: >> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n"); >> + case VHOST_USER_SET_LOG_FD: >> + close(msg.fds[0]); >> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n"); >> + break; >> + >> + case VHOST_USER_SET_VRING_NUM: >> + ops->set_vring_num(ctx, &msg.payload.state); >> + break; >> + case VHOST_USER_SET_VRING_ADDR: >> + ops->set_vring_addr(ctx, &msg.payload.addr); >> + break; >> + case VHOST_USER_SET_VRING_BASE: >> + ops->set_vring_base(ctx, &msg.payload.state); >> + break; >> + >> + case VHOST_USER_GET_VRING_BASE: >> + ret = user_get_vring_base(ctx, &msg.payload.state); >> + msg.size = sizeof(msg.payload.state); >> + send_vhost_message(connfd, &msg); >> + break; >> + >> + case VHOST_USER_SET_VRING_KICK: >> + user_set_vring_kick(ctx, &msg); >> + break; >> + case VHOST_USER_SET_VRING_CALL: >> + user_set_vring_call(ctx, &msg); >> + break; >> + >> + case VHOST_USER_SET_VRING_ERR: >> + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) >> + close(msg.fds[0]); >> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n"); >> + break; >> + >> + default: >> + break; >> + >> + } >> +} >> + >> + >> +/** >> + * Creates and initialise the vhost server. >> + */ >> +int >> +rte_vhost_driver_register(const char *path) >> +{ >> + >> + struct vhost_server *vserver; >> + >> + if (g_vhost_server != NULL) >> + return -1; >> + >> + vserver = calloc(sizeof(struct vhost_server), 1); >> + if (vserver == NULL) >> + return -1; >> + >> + fdset_init(&vserver->fdset); >> + >> + unlink(path); >> + >> + vserver->listenfd = uds_socket(path); >> + if (vserver->listenfd < 0) { >> + free(vserver); >> + return -1; >> + } >> + vserver->path = path; >> + >> + fdset_add(&vserver->fdset, vserver->listenfd, >> + vserver_new_vq_conn, NULL, >> + vserver); >> + >> + ops = get_virtio_net_callbacks(); >> + >> + g_vhost_server = vserver; >> + >> + return 0; >> +} >> + >> + >> +int >> +rte_vhost_driver_session_start(void) >> +{ >> + fdset_event_dispatch(&g_vhost_server->fdset); >> + return 0; >> +} >> + >> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h >> b/lib/librte_vhost/vhost_user/vhost-net-user.h >> new file mode 100644 >> index 0000000..7e6cda4 >> --- /dev/null >> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h >> @@ -0,0 +1,108 @@ >> +/*- >> + * BSD LICENSE >> + * >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * >> + * * Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * * Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in >> + * the documentation and/or other materials provided with the >> + * distribution. >> + * * Neither the name of Intel Corporation nor the names of its >> + * contributors may be used to endorse or promote products derived >> + * from this software without specific prior written permission. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> + >> +#ifndef _VHOST_NET_USER_H >> +#define _VHOST_NET_USER_H >> + >> +#include <stdint.h> >> +#include <linux/vhost.h> >> + >> +#include "fd_man.h" >> + >> +struct vhost_server { >> + const char *path; /**< The path the uds is bind to. */ >> + int listenfd; /**< The listener sockfd. */ >> + struct fdset fdset; /**< The fd list this vhost server manages. */ >> +}; >> + >> +/* refer to hw/virtio/vhost-user.c */ >> + >> +#define VHOST_MEMORY_MAX_NREGIONS 8 >> + >> +typedef enum VhostUserRequest { >> + VHOST_USER_NONE = 0, >> + VHOST_USER_GET_FEATURES = 1, >> + VHOST_USER_SET_FEATURES = 2, >> + VHOST_USER_SET_OWNER = 3, >> + VHOST_USER_RESET_OWNER = 4, >> + VHOST_USER_SET_MEM_TABLE = 5, >> + VHOST_USER_SET_LOG_BASE = 6, >> + VHOST_USER_SET_LOG_FD = 7, >> + VHOST_USER_SET_VRING_NUM = 8, >> + VHOST_USER_SET_VRING_ADDR = 9, >> + VHOST_USER_SET_VRING_BASE = 10, >> + VHOST_USER_GET_VRING_BASE = 11, >> + VHOST_USER_SET_VRING_KICK = 12, >> + VHOST_USER_SET_VRING_CALL = 13, >> + VHOST_USER_SET_VRING_ERR = 14, >> + VHOST_USER_MAX >> +} VhostUserRequest; >> + >> +typedef struct VhostUserMemoryRegion { >> + uint64_t guest_phys_addr; >> + uint64_t memory_size; >> + uint64_t userspace_addr; >> + uint64_t mmap_offset; >> +} VhostUserMemoryRegion; >> + >> +typedef struct VhostUserMemory { >> + uint32_t nregions; >> + uint32_t padding; >> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; >> +} VhostUserMemory; >> + >> +typedef struct VhostUserMsg { >> + VhostUserRequest request; >> + >> +#define VHOST_USER_VERSION_MASK (0x3) >> +#define VHOST_USER_REPLY_MASK (0x1 << 2) >> + uint32_t flags; >> + uint32_t size; /* the following payload size */ >> + union { >> +#define VHOST_USER_VRING_IDX_MASK (0xff) >> +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) >> + uint64_t u64; >> + struct vhost_vring_state state; >> + struct vhost_vring_addr addr; >> + VhostUserMemory memory; >> + } payload; >> + int fds[VHOST_MEMORY_MAX_NREGIONS]; >> +} __attribute((packed)) VhostUserMsg; >> + >> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64) >> + >> +/* The version of the protocol we support */ >> +#define VHOST_USER_VERSION (0x1) >> + >> +/*****************************************************************************/ >> +#endif >> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c >> b/lib/librte_vhost/vhost_user/virtio-net-user.c >> new file mode 100644 >> index 0000000..6601fcd >> --- /dev/null >> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c >> @@ -0,0 +1,205 @@ >> +/*- >> + * BSD LICENSE >> + * >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * >> + * * Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * * Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in >> + * the documentation and/or other materials provided with the >> + * distribution. >> + * * Neither the name of Intel Corporation nor the names of its >> + * contributors may be used to endorse or promote products derived >> + * from this software without specific prior written permission. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> + >> +#include <stdint.h> >> +#include <stdio.h> >> +#include <stdlib.h> >> +#include <unistd.h> >> +#include <sys/mman.h> >> + >> +#include <rte_log.h> >> + >> +#include "virtio-net.h" >> +#include "virtio-net-user.h" >> +#include "vhost-net-user.h" >> +#include "vhost-net.h" >> + >> +int >> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) >> +{ >> + unsigned int idx; >> + struct VhostUserMemory memory = pmsg->payload.memory; >> + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS]; >> + uint64_t mapped_address, base_address = 0; >> + >> + for (idx = 0; idx < memory.nregions; idx++) { >> + if (memory.regions[idx].guest_phys_addr == 0) >> + base_address = memory.regions[idx].userspace_addr; >> + } >> + if (base_address == 0) { >> + RTE_LOG(ERR, VHOST_CONFIG, >> + "couldn't find the mem region whose GPA is 0.\n"); >> + return -1; >> + } >> + >> + for (idx = 0; idx < memory.nregions; idx++) { >> + regions[idx].guest_phys_address = >> + memory.regions[idx].guest_phys_addr; >> + regions[idx].guest_phys_address_end = >> + memory.regions[idx].guest_phys_addr + >> + memory.regions[idx].memory_size; >> + regions[idx].memory_size = memory.regions[idx].memory_size; >> + regions[idx].userspace_address = >> + memory.regions[idx].userspace_addr; >> + >> + /* This is ugly */ >> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL, >> + regions[idx].memory_size + >> + memory.regions[idx].mmap_offset, >> + PROT_READ | PROT_WRITE, MAP_SHARED, >> + pmsg->fds[idx], >> + 0); >> + RTE_LOG(INFO, VHOST_CONFIG, >> + "mapped region %d to %p\n", >> + idx, (void *)mapped_address); >> + >> + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) { >> + RTE_LOG(ERR, VHOST_CONFIG, >> + "mmap qemu guest failed.\n"); >> + return -1; >> + } >> + >> + mapped_address += memory.regions[idx].mmap_offset; >> + >> + regions[idx].address_offset = mapped_address - >> + regions[idx].guest_phys_address; >> + LOG_DEBUG(VHOST_CONFIG, >> + "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n", >> + idx, >> + (void *)(uintptr_t)regions[idx].guest_phys_address, >> + (void *)(uintptr_t)regions[idx].userspace_address, >> + regions[idx].memory_size); >> + } >> + ops->set_mem_table(ctx, regions, memory.nregions); >> + return 0; >> +} >> + >> + >> +static int >> +virtio_is_ready(struct virtio_net *dev) >> +{ >> + struct vhost_virtqueue *rvq, *tvq; >> + >> + /* mq support in future.*/ >> + rvq = dev->virtqueue[VIRTIO_RXQ]; >> + tvq = dev->virtqueue[VIRTIO_TXQ]; >> + if (rvq && tvq && rvq->desc && tvq->desc && >> + (rvq->kickfd != (eventfd_t)-1) && >> + (rvq->callfd != (eventfd_t)-1) && >> + (tvq->kickfd != (eventfd_t)-1) && >> + (tvq->callfd != (eventfd_t)-1)) { >> + RTE_LOG(INFO, VHOST_CONFIG, >> + "virtio is now ready for processing.\n"); >> + return 1; >> + } >> + RTE_LOG(INFO, VHOST_CONFIG, >> + "virtio isn't ready for processing.\n"); >> + return 0; >> +} >> + >> +void >> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) >> +{ >> + struct vhost_vring_file file; >> + >> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; >> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) >> + file.fd = -1; >> + else >> + file.fd = pmsg->fds[0]; >> + RTE_LOG(INFO, VHOST_CONFIG, >> + "vring call idx:%d file:%d\n", file.index, file.fd); >> + ops->set_vring_call(ctx, &file); >> +} >> + >> + >> +/* >> + * In vhost-user, when we receive kick message, will test whether virtio >> + * device is ready for packet processing. >> + */ >> +void >> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) >> +{ >> + struct vhost_vring_file file; >> + struct virtio_net *dev = get_device(ctx); >> + >> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; >> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) >> + file.fd = -1; >> + else >> + file.fd = pmsg->fds[0]; >> + RTE_LOG(INFO, VHOST_CONFIG, >> + "vring kick idx:%d file:%d\n", file.index, file.fd); >> + ops->set_vring_kick(ctx, &file); >> + >> + if (virtio_is_ready(dev) && >> + !(dev->flags & VIRTIO_DEV_RUNNING)) >> + notify_ops->new_device(dev); >> + >> +} >> + >> +/* >> + * when virtio is stopped, qemu will send us the GET_VRING_BASE message. >> + */ >> +int >> +user_get_vring_base(struct vhost_device_ctx ctx, >> + struct vhost_vring_state *state) >> +{ >> + struct virtio_net *dev = get_device(ctx); >> + >> + /* We have to stop the queue (virtio) if it is running. */ >> + if (dev->flags & VIRTIO_DEV_RUNNING) >> + notify_ops->destroy_device(dev); >> + >> + /* Here we are safe to get the last used index */ >> + ops->get_vring_base(ctx, state->index, state); >> + >> + RTE_LOG(INFO, VHOST_CONFIG, >> + "vring base idx:%d file:%d\n", state->index, state->num); >> + /* >> + * Based on current qemu vhost-user implementation, this message is >> + * sent and only sent in vhost_vring_stop. >> + * TODO: cleanup the vring, it isn't usable since here. >> + */ > > Please don't tie yourself to a current qemu implementation. Please just > extend qemu to send explicit start/stop messages. > You'll need to negotiate the new capabilities. > > > Nikolay, it seems that version field is only 2 bits. > how can we extend it cleanly?
Will something like this do: #define VHOST_USER_VERSION_MASK_MAJ (0x3) #define VHOST_USER_REPLY_MASK (0x1<<2) #define VHOST_USER_VERSION_MASK_MIN (0xf<<3) The "major" part of the version will be increased in case of significant changes in the protocol. And the "minor" part in all other cases. I guess this will give us enough space for versioning. regards, Nikolay Nikolaev > > Perhaps, add a new GET_POTOCOL message for exchanging vhost user > specific bits, then remote should set a high version bit to let qemu > know it's supported? > > > > >> + if (((int)dev->virtqueue[VIRTIO_RXQ]->callfd) >= 0) { >> + close(dev->virtqueue[VIRTIO_RXQ]->callfd); >> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1; >> + } >> + if (((int)dev->virtqueue[VIRTIO_TXQ]->callfd) >= 0) { >> + close(dev->virtqueue[VIRTIO_TXQ]->callfd); >> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1; >> + } >> + >> + return 0; >> + >> +} >> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h >> b/lib/librte_vhost/vhost_user/virtio-net-user.h >> new file mode 100644 >> index 0000000..0f6a75a >> --- /dev/null >> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h >> @@ -0,0 +1,48 @@ >> +/*- >> + * BSD LICENSE >> + * >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * >> + * * Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * * Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in >> + * the documentation and/or other materials provided with the >> + * distribution. >> + * * Neither the name of Intel Corporation nor the names of its >> + * contributors may be used to endorse or promote products derived >> + * from this software without specific prior written permission. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> + >> +#ifndef _VIRTIO_NET_USER_H >> +#define _VIRTIO_NET_USER_H >> + >> +#include "vhost-net.h" >> +#include "vhost-net-user.h" >> + >> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *); >> + >> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *); >> + >> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *); >> + >> +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state >> *); >> + >> +#endif >> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c >> index 57a5801..c458ed9 100644 >> --- a/lib/librte_vhost/virtio-net.c >> +++ b/lib/librte_vhost/virtio-net.c >> @@ -50,6 +50,7 @@ >> #include <rte_virtio_net.h> >> >> #include "vhost-net.h" >> +#include "virtio-net.h" >> >> /* >> * Device linked list structure for configuration. >> @@ -60,7 +61,7 @@ struct virtio_net_config_ll { >> }; >> >> /* device ops to add/remove device to/from data core. */ >> -static struct virtio_net_device_ops const *notify_ops; >> +struct virtio_net_device_ops const *notify_ops; >> /* root address of the linked list of managed virtio devices */ >> static struct virtio_net_config_ll *ll_root; >> >> @@ -88,8 +89,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va) >> if ((qemu_va >= region->userspace_address) && >> (qemu_va <= region->userspace_address + >> region->memory_size)) { >> - vhost_va = dev->mem->mapped_address + qemu_va - >> - dev->mem->base_address; >> + vhost_va = qemu_va + region->guest_phys_address + >> + region->address_offset - >> + region->userspace_address; >> break; >> } >> } >> @@ -119,7 +121,7 @@ get_config_ll_entry(struct vhost_device_ctx ctx) >> * Searches the configuration core linked list and >> * retrieves the device if it exists. >> */ >> -static struct virtio_net * >> +struct virtio_net * >> get_device(struct vhost_device_ctx ctx) >> { >> struct virtio_net_config_ll *ll_dev; >> @@ -256,6 +258,11 @@ init_device(struct virtio_net *dev) >> memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue)); >> memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue)); >> >> + dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1; >> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1; >> + dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1; >> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1; >> + >> /* Backends are set to -1 indicating an inactive device. */ >> dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED; >> dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED; >> @@ -455,12 +462,6 @@ set_mem_table(struct vhost_device_ctx ctx, >> if (dev == NULL) >> return -1; >> >> - if (dev->mem) { >> - munmap((void *)(uintptr_t)dev->mem->mapped_address, >> - (size_t)dev->mem->mapped_size); >> - free(dev->mem); >> - } >> - >> /* Malloc the memory structure depending on the number of regions. */ >> mem = calloc(1, sizeof(struct virtio_memory) + >> (sizeof(struct virtio_memory_regions) * nregions)); >> @@ -624,7 +625,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct >> vhost_vring_file *file) >> /* file->index refers to the queue index. The txq is 1, rxq is 0. */ >> vq = dev->virtqueue[file->index]; >> >> - if (vq->kickfd) >> + if ((int)vq->kickfd >= 0) >> close((int)vq->kickfd); >> >> vq->kickfd = file->fd; >> @@ -650,8 +651,9 @@ set_vring_kick(struct vhost_device_ctx ctx, struct >> vhost_vring_file *file) >> /* file->index refers to the queue index. The txq is 1, rxq is 0. */ >> vq = dev->virtqueue[file->index]; >> >> - if (vq->callfd) >> + if ((int)vq->callfd >= 0) >> close((int)vq->callfd); >> + >> vq->callfd = file->fd; >> >> return 0; >> diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h >> new file mode 100644 >> index 0000000..75fb57e >> --- /dev/null >> +++ b/lib/librte_vhost/virtio-net.h >> @@ -0,0 +1,43 @@ >> +/*- >> + * BSD LICENSE >> + * >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * >> + * * Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * * Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in >> + * the documentation and/or other materials provided with the >> + * distribution. >> + * * Neither the name of Intel Corporation nor the names of its >> + * contributors may be used to endorse or promote products derived >> + * from this software without specific prior written permission. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> + >> +#ifndef _VIRTIO_NET_H >> +#define _VIRTIO_NET_H >> + >> +#include "vhost-net.h" >> +#include "rte_virtio_net.h" >> + >> +struct virtio_net_device_ops const *notify_ops; >> +struct virtio_net *get_device(struct vhost_device_ctx ctx); >> + >> +#endif >> -- >> 1.8.1.4 >>