On Fri, May 01, 2020 at 01:12:17PM +0300, Denis Kirjanov wrote: > The patch adds a basic XDP processing to xen-netfront driver. > > We ran an XDP program for an RX response received from netback > driver. Also we request xen-netback to adjust data offset for > bpf_xdp_adjust_head() header space for custom headers. > > synchronization between frontend and backend parts is done > by using xenbus state switching: > Reconfiguring -> Reconfigured- > Connected > > UDP packets drop rate using xdp program is around 310 kpps > using ./pktgen_sample04_many_flows.sh and 160 kpps without the patch. > > v6: > - added the missing SOB line > - fixed subject > > v5: > - split netfront/netback changes > - added a sync point between backend/frontend on switching to XDP > - added pagepool API > > v4: > - added verbose patch descriprion > - don't expose the XDP headroom offset to the domU guest > - add a modparam to netback to toggle XDP offset > - don't process jumbo frames for now > > v3: > - added XDP_TX support (tested with xdping echoserver) > - added XDP_REDIRECT support (tested with modified xdp_redirect_kern) > - moved xdp negotiation to xen-netback > > v2: > - avoid data copying while passing to XDP > - tell xen-netback that we need the headroom space > > Signed-off-by: Denis Kirjanov <k...@linux-powerpc.org> > --- > drivers/net/xen-netfront.c | 302 > ++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 298 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c > index 482c6c8..e7e2c11 100644 > --- a/drivers/net/xen-netfront.c > +++ b/drivers/net/xen-netfront.c > @@ -44,6 +44,9 @@ > #include <linux/mm.h> > #include <linux/slab.h> > #include <net/ip.h> > +#include <linux/bpf.h> > +#include <net/page_pool.h> > +#include <linux/bpf_trace.h> > > #include <xen/xen.h> > #include <xen/xenbus.h> > @@ -102,6 +105,8 @@ struct netfront_queue { > char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */ > struct netfront_info *info; > > + struct bpf_prog __rcu *xdp_prog; > + > struct napi_struct napi; > > /* Split event channels support, tx_* == rx_* when using > @@ -144,6 +149,9 @@ struct netfront_queue { > struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; > grant_ref_t gref_rx_head; > grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; > + > + struct page_pool *page_pool; > + struct xdp_rxq_info xdp_rxq; > }; > > struct netfront_info { > @@ -159,6 +167,8 @@ struct netfront_info { > struct netfront_stats __percpu *rx_stats; > struct netfront_stats __percpu *tx_stats; > > + bool netback_has_xdp_headroom; > + > atomic_t rx_gso_checksum_fixup; > }; > > @@ -167,6 +177,9 @@ struct netfront_rx_info { > struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; > }; > > +static int xennet_xdp_xmit(struct net_device *dev, int n, > + struct xdp_frame **frames, u32 flags); > + > static void skb_entry_set_link(union skb_entry *list, unsigned short id) > { > list->link = id; > @@ -265,8 +278,9 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct > netfront_queue *queue) > if (unlikely(!skb)) > return NULL; > > - page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); > - if (!page) { > + page = page_pool_alloc_pages(queue->page_pool, > + GFP_ATOMIC | __GFP_NOWARN);
You can use page_pool_dev_alloc_pages(), which is called with the exact same arguments. > + if (unlikely(!page)) { > kfree_skb(skb); > return NULL; > } > @@ -778,6 +792,53 @@ static int xennet_get_extras(struct netfront_queue > *queue, > return err; > } > > +u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata, > + struct xen_netif_rx_response *rx, struct bpf_prog *prog, > + struct xdp_buff *xdp) > +{ > + struct xdp_frame *xdpf; > + u32 len = rx->status; > + u32 act = XDP_PASS; > + int err; > + > + xdp->data_hard_start = page_address(pdata); > + xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; > + xdp_set_data_meta_invalid(xdp); > + xdp->data_end = xdp->data + len; > + xdp->rxq = &queue->xdp_rxq; > + xdp->handle = 0; > + > + act = bpf_prog_run_xdp(prog, xdp); > + switch (act) { > + case XDP_TX: > + get_page(pdata); > + xdpf = convert_to_xdp_frame(xdp); > + err = xennet_xdp_xmit(queue->info->netdev, 1, &xdpf, 0); > + if (unlikely(err < 0)) > + trace_xdp_exception(queue->info->netdev, prog, act); > + break; > + case XDP_REDIRECT: > + get_page(pdata); > + err = xdp_do_redirect(queue->info->netdev, xdp, prog); > + if (unlikely(err)) > + trace_xdp_exception(queue->info->netdev, prog, act); > + xdp_do_flush(); > + break; > + case XDP_PASS: > + case XDP_DROP: > + break; > + > + case XDP_ABORTED: > + trace_xdp_exception(queue->info->netdev, prog, act); > + break; > + > + default: > + bpf_warn_invalid_xdp_action(act); > + } > + > + return act; > +} > + > static int xennet_get_responses(struct netfront_queue *queue, > struct netfront_rx_info *rinfo, RING_IDX rp, > struct sk_buff_head *list) > @@ -792,6 +853,9 @@ static int xennet_get_responses(struct netfront_queue > *queue, > int slots = 1; > int err = 0; > unsigned long ret; > + struct bpf_prog *xdp_prog; > + struct xdp_buff xdp; > + u32 verdict; > > if (rx->flags & XEN_NETRXF_extra_info) { > err = xennet_get_extras(queue, extras, rp); > @@ -827,9 +891,20 @@ static int xennet_get_responses(struct netfront_queue > *queue, > > gnttab_release_grant_reference(&queue->gref_rx_head, ref); > > - __skb_queue_tail(list, skb); > - > + rcu_read_lock(); > + xdp_prog = rcu_dereference(queue->xdp_prog); > + if (xdp_prog && !(rx->flags & XEN_NETRXF_more_data)) { > + /* currently only a single page contains data */ > + WARN_ON_ONCE(skb_shinfo(skb)->nr_frags != 1); > + verdict = xennet_run_xdp(queue, > + > skb_frag_page(&skb_shinfo(skb)->frags[0]), > + rx, xdp_prog, &xdp); > + if (verdict != XDP_PASS) > + err = -EINVAL; > + } > + rcu_read_unlock(); > next: > + __skb_queue_tail(list, skb); > if (!(rx->flags & XEN_NETRXF_more_data)) > break; > > @@ -997,6 +1072,7 @@ static int xennet_poll(struct napi_struct *napi, int > budget) > struct sk_buff_head rxq; > struct sk_buff_head errq; > struct sk_buff_head tmpq; > + struct bpf_prog *xdp_prog; > int err; > > spin_lock(&queue->rx_lock); > @@ -1014,6 +1090,12 @@ static int xennet_poll(struct napi_struct *napi, int > budget) > memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx)); > memset(extras, 0, sizeof(rinfo.extras)); > > + rcu_read_lock(); > + xdp_prog = rcu_dereference(queue->xdp_prog); > + if (xdp_prog) > + rx->offset = XDP_PACKET_HEADROOM; > + rcu_read_unlock(); > + > err = xennet_get_responses(queue, &rinfo, rp, &tmpq); > > if (unlikely(err)) { > @@ -1261,6 +1343,156 @@ static void xennet_poll_controller(struct net_device > *dev) > } > #endif > > +#define NETBACK_XDP_HEADROOM_DISABLE 0 > +#define NETBACK_XDP_HEADROOM_ENABLE 1 > + > +static int talk_to_netback_xdp(struct netfront_info *np, int xdp) > +{ > + int err; > + > + err = xenbus_printf(XBT_NIL, np->xbdev->nodename, > + "feature-xdp", "%u", xdp); > + if (err) > + pr_debug("Error writing feature-xdp\n"); > + > + return err; > +} > + > +static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog, > + struct netlink_ext_ack *extack) > +{ > + struct netfront_info *np = netdev_priv(dev); > + struct bpf_prog *old_prog; > + unsigned int i, err; > + unsigned long int max_mtu = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM; > + > + if (dev->mtu > max_mtu) { > + netdev_warn(dev, "XDP requires MTU less than %lu\n", max_mtu); > + return -EINVAL; > + } > + > + if (!np->netback_has_xdp_headroom) > + return 0; > + > + old_prog = rtnl_dereference(np->queues[0].xdp_prog); > + if (!old_prog && !prog) > + return 0; > + > + if (prog) > + bpf_prog_add(prog, dev->real_num_tx_queues); > + > + for (i = 0; i < dev->real_num_tx_queues; ++i) > + rcu_assign_pointer(np->queues[i].xdp_prog, prog); > + > + if (old_prog) > + for (i = 0; i < dev->real_num_tx_queues; ++i) > + bpf_prog_put(old_prog); > + > + xenbus_switch_state(np->xbdev, XenbusStateReconfiguring); > + > + err = talk_to_netback_xdp(np, prog ? NETBACK_XDP_HEADROOM_ENABLE: > + NETBACK_XDP_HEADROOM_DISABLE); > + if (err) > + return err; > + > + /* avoid race with XDP headroom adjustment */ > + wait_event(module_wq, > + xenbus_read_driver_state(np->xbdev->otherend) == > + XenbusStateReconfigured); > + xenbus_switch_state(np->xbdev, XenbusStateConnected); > + > + return 0; > +} > + > +static u32 xennet_xdp_query(struct net_device *dev) > +{ > + struct netfront_info *np = netdev_priv(dev); > + unsigned int num_queues = dev->real_num_tx_queues; > + unsigned int i; > + struct netfront_queue *queue; > + const struct bpf_prog *xdp_prog; > + > + for (i = 0; i < num_queues; ++i) { > + queue = &np->queues[i]; > + xdp_prog = rtnl_dereference(queue->xdp_prog); > + if (xdp_prog) > + return xdp_prog->aux->id; > + } > + > + return 0; > +} > + > +static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp) > +{ > + switch (xdp->command) { > + case XDP_SETUP_PROG: > + return xennet_xdp_set(dev, xdp->prog, xdp->extack); > + case XDP_QUERY_PROG: > + xdp->prog_id = xennet_xdp_query(dev); > + return 0; > + default: > + return -EINVAL; > + } > +} > + > +static int xennet_xdp_xmit_one(struct net_device *dev, struct xdp_frame > *xdpf) > +{ > + struct netfront_info *np = netdev_priv(dev); > + struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats); > + struct netfront_queue *queue = NULL; > + unsigned int num_queues = dev->real_num_tx_queues; > + unsigned long flags; > + int notify; > + struct xen_netif_tx_request *tx; > + > + queue = &np->queues[smp_processor_id() % num_queues]; > + > + spin_lock_irqsave(&queue->tx_lock, flags); > + > + tx = xennet_make_first_txreq(queue, NULL, > + virt_to_page(xdpf->data), > + offset_in_page(xdpf->data), > + xdpf->len); > + > + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify); > + if (notify) > + notify_remote_via_irq(queue->tx_irq); > + > + u64_stats_update_begin(&tx_stats->syncp); > + tx_stats->bytes += xdpf->len; > + tx_stats->packets++; > + u64_stats_update_end(&tx_stats->syncp); > + > + xennet_tx_buf_gc(queue); > + > + spin_unlock_irqrestore(&queue->tx_lock, flags); > + return 0; > +} > + > +static int xennet_xdp_xmit(struct net_device *dev, int n, > + struct xdp_frame **frames, u32 flags) > +{ > + int drops = 0; > + int i, err; > + > + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) > + return -EINVAL; > + > + for (i = 0; i < n; i++) { > + struct xdp_frame *xdpf = frames[i]; > + > + if (!xdpf) > + continue; > + err = xennet_xdp_xmit_one(dev, xdpf); > + if (err) { > + xdp_return_frame_rx_napi(xdpf); > + drops++; > + } > + } > + > + return n - drops; > +} > + > static const struct net_device_ops xennet_netdev_ops = { > .ndo_open = xennet_open, > .ndo_stop = xennet_close, > @@ -1272,6 +1504,8 @@ static void xennet_poll_controller(struct net_device > *dev) > .ndo_fix_features = xennet_fix_features, > .ndo_set_features = xennet_set_features, > .ndo_select_queue = xennet_select_queue, > + .ndo_bpf = xennet_xdp, > + .ndo_xdp_xmit = xennet_xdp_xmit, > #ifdef CONFIG_NET_POLL_CONTROLLER > .ndo_poll_controller = xennet_poll_controller, > #endif > @@ -1419,6 +1653,8 @@ static void xennet_disconnect_backend(struct > netfront_info *info) > queue->rx_ring_ref = GRANT_INVALID_REF; > queue->tx.sring = NULL; > queue->rx.sring = NULL; > + > + page_pool_destroy(queue->page_pool); > } > } > > @@ -1754,6 +1990,51 @@ static void xennet_destroy_queues(struct netfront_info > *info) > info->queues = NULL; > } > > + > + > +static int xennet_create_page_pool(struct netfront_queue *queue) > +{ > + int err; > + struct page_pool_params pp_params = { > + .order = 0, > + .flags = 0, > + .pool_size = NET_RX_RING_SIZE, > + .nid = NUMA_NO_NODE, > + .dev = &queue->info->netdev->dev, > + .offset = XDP_PACKET_HEADROOM, > + .max_len = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM, > + }; > + > + queue->page_pool = page_pool_create(&pp_params); > + if (IS_ERR(queue->page_pool)) { > + err = PTR_ERR(queue->page_pool); > + queue->page_pool = NULL; > + return err; > + } > + > + err = xdp_rxq_info_reg(&queue->xdp_rxq, queue->info->netdev, > + queue->id); > + if (err) { > + netdev_err(queue->info->netdev, "xdp_rxq_info_reg failed\n"); > + goto err_free_pp; > + } > + > + err = xdp_rxq_info_reg_mem_model(&queue->xdp_rxq, > + MEM_TYPE_PAGE_ORDER0, NULL); > + if (err) { > + netdev_err(queue->info->netdev, "xdp_rxq_info_reg_mem_model > failed\n"); > + goto err_unregister_rxq; > + } > + return 0; > + > +err_unregister_rxq: > + xdp_rxq_info_unreg(&queue->xdp_rxq); > +err_free_pp: > + page_pool_destroy(queue->page_pool); > + queue->page_pool = NULL; > + return err; > +} > + > static int xennet_create_queues(struct netfront_info *info, > unsigned int *num_queues) > { > @@ -1779,6 +2060,14 @@ static int xennet_create_queues(struct netfront_info > *info, > break; > } > > + /* use page pool recycling instead of buddy allocator */ > + ret = xennet_create_page_pool(queue); > + if (ret < 0) { > + dev_err(&info->xbdev->dev, "can't allocate page > pool\n"); > + *num_queues = i; > + return ret; > + } > + > netif_napi_add(queue->info->netdev, &queue->napi, > xennet_poll, 64); > if (netif_running(info->netdev)) > @@ -1825,6 +2114,8 @@ static int talk_to_netback(struct xenbus_device *dev, > goto out_unlocked; > } > > + info->netback_has_xdp_headroom = > xenbus_read_unsigned(info->xbdev->otherend, > + > "feature-xdp-headroom", 0); > rtnl_lock(); > if (info->queues) > xennet_destroy_queues(info); > @@ -1959,6 +2250,8 @@ static int xennet_connect(struct net_device *dev) > err = talk_to_netback(np->xbdev, np); > if (err) > return err; > + if (np->netback_has_xdp_headroom) > + pr_info("backend supports XDP headroom\n"); > > /* talk_to_netback() sets the correct number of queues */ > num_queues = dev->real_num_tx_queues; > @@ -2020,6 +2313,7 @@ static void netback_changed(struct xenbus_device *dev, > case XenbusStateInitialised: > case XenbusStateReconfiguring: > case XenbusStateReconfigured: > + break; > case XenbusStateUnknown: > break; > > -- > 1.8.3.1 >