On Fri, Mar 23, 2018 at 5:18 AM, Jesper Dangaard Brouer <bro...@redhat.com> wrote: > Use the IDA infrastructure for getting a cyclic increasing ID number, > that is used for keeping track of each registered allocator per > RX-queue xdp_rxq_info. Instead of using the IDR infrastructure, which > uses a radix tree, use a dynamic rhashtable, for creating ID to > pointer lookup table, because this is faster. > > The problem that is being solved here is that, the xdp_rxq_info > pointer (stored in xdp_buff) cannot be used directly, as the > guaranteed lifetime is too short. The info is needed on a > (potentially) remote CPU during DMA-TX completion time . In an > xdp_frame the xdp_mem_info is stored, when it got converted from an > xdp_buff, which is sufficient for the simple page refcnt based recycle > schemes. > > For more advanced allocators there is a need to store a pointer to the > registered allocator. Thus, there is a need to guard the lifetime or > validity of the allocator pointer, which is done through this > rhashtable ID map to pointer. The removal and validity of of the > allocator and helper struct xdp_mem_allocator is guarded by RCU. The > allocator will be created by the driver, and registered with > xdp_rxq_info_reg_mem_model(). > > It is up-to debate who is responsible for freeing the allocator > pointer or invoking the allocator destructor function. In any case, > this must happen via RCU freeing. > > Use the IDA infrastructure for getting a cyclic increasing ID number, > that is used for keeping track of each registered allocator per > RX-queue xdp_rxq_info. > > V4: Per req of Jason Wang > - Use xdp_rxq_info_reg_mem_model() in all drivers implementing > XDP_REDIRECT, even-though it's not strictly necessary when > allocator==NULL for type MEM_TYPE_PAGE_SHARED (given it's zero). > > Signed-off-by: Jesper Dangaard Brouer <bro...@redhat.com> > --- > drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 9 + > drivers/net/tun.c | 6 + > drivers/net/virtio_net.c | 7 + > include/net/xdp.h | 15 -- > net/core/xdp.c | 230 > ++++++++++++++++++++++++- > 5 files changed, 248 insertions(+), 19 deletions(-) > > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c > b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c > index 45520eb503ee..ff069597fccf 100644 > --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c > +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c > @@ -6360,7 +6360,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter > *adapter, > struct device *dev = rx_ring->dev; > int orig_node = dev_to_node(dev); > int ring_node = -1; > - int size; > + int size, err; > > size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; > > @@ -6397,6 +6397,13 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter > *adapter, > rx_ring->queue_index) < 0) > goto err; > > + err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, > + MEM_TYPE_PAGE_SHARED, NULL); > + if (err) { > + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); > + goto err; > + } > + > rx_ring->xdp_prog = adapter->xdp_prog; > > return 0; > diff --git a/drivers/net/tun.c b/drivers/net/tun.c > index 6750980d9f30..81fddf9cc58f 100644 > --- a/drivers/net/tun.c > +++ b/drivers/net/tun.c > @@ -846,6 +846,12 @@ static int tun_attach(struct tun_struct *tun, struct > file *file, > tun->dev, tfile->queue_index); > if (err < 0) > goto out; > + err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq, > + MEM_TYPE_PAGE_SHARED, NULL); > + if (err < 0) { > + xdp_rxq_info_unreg(&tfile->xdp_rxq); > + goto out; > + } > err = 0; > } > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 6c4220450506..48c86accd3b8 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -1312,6 +1312,13 @@ static int virtnet_open(struct net_device *dev) > if (err < 0) > return err; > > + err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq, > + MEM_TYPE_PAGE_SHARED, NULL); > + if (err < 0) { > + xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); > + return err; > + } > + > virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); > virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi); > } > diff --git a/include/net/xdp.h b/include/net/xdp.h > index bc0cb97e20dc..859aa9b737fe 100644 > --- a/include/net/xdp.h > +++ b/include/net/xdp.h > @@ -41,7 +41,7 @@ enum mem_type { > > struct xdp_mem_info { > u32 type; /* enum mem_type, but known size type */ > - /* u32 id; will be added later in this patchset */ > + u32 id; > }; > > struct xdp_rxq_info { > @@ -100,18 +100,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff > *xdp) > return xdp_frame; > } > > -static inline > -void xdp_return_frame(void *data, struct xdp_mem_info *mem) > -{ > - if (mem->type == MEM_TYPE_PAGE_SHARED) > - page_frag_free(data); > - > - if (mem->type == MEM_TYPE_PAGE_ORDER0) { > - struct page *page = virt_to_page(data); /* Assumes order0 > page*/ > - > - put_page(page); > - } > -} > +void xdp_return_frame(void *data, struct xdp_mem_info *mem); > > int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, > struct net_device *dev, u32 queue_index); > diff --git a/net/core/xdp.c b/net/core/xdp.c > index 9eee0c431126..06a5b39491ad 100644 > --- a/net/core/xdp.c > +++ b/net/core/xdp.c > @@ -5,6 +5,9 @@ > */ > #include <linux/types.h> > #include <linux/mm.h> > +#include <linux/slab.h> > +#include <linux/idr.h> > +#include <linux/rhashtable.h> > > #include <net/xdp.h> > > @@ -13,6 +16,99 @@ > #define REG_STATE_UNREGISTERED 0x2 > #define REG_STATE_UNUSED 0x3 > > +DEFINE_IDA(mem_id_pool); > +static DEFINE_MUTEX(mem_id_lock); > +#define MEM_ID_MAX 0xFFFE > +#define MEM_ID_MIN 1 > +static int mem_id_next = MEM_ID_MIN; > + > +static bool mem_id_init; /* false */ > +static struct rhashtable *mem_id_ht; > + > +struct xdp_mem_allocator { > + struct xdp_mem_info mem; > + void *allocator; > + struct rhash_head node; > + struct rcu_head rcu; > +}; > + > +static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) > +{ > + const u32 *k = data; > + const u32 key = *k; > + > + BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id) > + != sizeof(u32)); > + > + /* Use cyclic increasing ID as direct hash key, see rht_bucket_index > */ > + return key << RHT_HASH_RESERVED_SPACE; > +} > + > +static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, > + const void *ptr) > +{ > + const struct xdp_mem_allocator *xa = ptr; > + u32 mem_id = *(u32 *)arg->key; > + > + return xa->mem.id != mem_id; > +} > + > +static const struct rhashtable_params mem_id_rht_params = { > + .nelem_hint = 64, > + .head_offset = offsetof(struct xdp_mem_allocator, node), > + .key_offset = offsetof(struct xdp_mem_allocator, mem.id), > + .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id), > + .max_size = MEM_ID_MAX, > + .min_size = 8, > + .automatic_shrinking = true, > + .hashfn = xdp_mem_id_hashfn, > + .obj_cmpfn = xdp_mem_id_cmp, > +}; > + > +void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) > +{ > + struct xdp_mem_allocator *xa; > + > + xa = container_of(rcu, struct xdp_mem_allocator, rcu); > + > + /* Allow this ID to be reused */ > + ida_simple_remove(&mem_id_pool, xa->mem.id); > + > + /* TODO: Depending on allocator type/pointer free resources */ > + > + /* Poison memory */ > + xa->mem.id = 0xFFFF; > + xa->mem.type = 0xF0F0; > + xa->allocator = (void *)0xDEAD9001; > + > + kfree(xa); > +} > + > +void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) > +{ > + struct xdp_mem_allocator *xa; > + int id = xdp_rxq->mem.id; > + int err; > + > + if (id == 0) > + return; > + > + mutex_lock(&mem_id_lock); > + > + xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params); > + if (!xa) { > + mutex_unlock(&mem_id_lock); > + return; > + } > + > + err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params); > + WARN_ON(err); > + > + call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); > + > + mutex_unlock(&mem_id_lock); > +} > + > void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) > { > /* Simplify driver cleanup code paths, allow unreg "unused" */ > @@ -21,8 +117,14 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) > > WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); > > + __xdp_rxq_info_unreg_mem_model(xdp_rxq); > + > xdp_rxq->reg_state = REG_STATE_UNREGISTERED; > xdp_rxq->dev = NULL; > + > + /* Reset mem info to defaults */ > + xdp_rxq->mem.id = 0; > + xdp_rxq->mem.type = 0; > } > EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg); > > @@ -72,20 +174,138 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq) > } > EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg); > > +int __mem_id_init_hash_table(void) > +{ > + struct rhashtable *rht; > + int ret; > + > + if (unlikely(mem_id_init)) > + return 0; > + > + rht = kzalloc(sizeof(*rht), GFP_KERNEL); > + if (!rht) > + return -ENOMEM; > + > + ret = rhashtable_init(rht, &mem_id_rht_params); > + if (ret < 0) { > + kfree(rht); > + return ret; > + } > + mem_id_ht = rht; > + smp_mb(); /* mutex lock should provide enough pairing */ > + mem_id_init = true; > + > + return 0; > +} > + > +/* Allocate a cyclic ID that maps to allocator pointer. > + * See: https://www.kernel.org/doc/html/latest/core-api/idr.html > + * > + * Caller must lock mem_id_lock. > + */ > +static int __mem_id_cyclic_get(gfp_t gfp) > +{ > + int retries = 1; > + int id; > + > +again: > + id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp); > + if (id < 0) { > + if (id == -ENOSPC) { > + /* Cyclic allocator, reset next id */ > + if (retries--) { > + mem_id_next = MEM_ID_MIN; > + goto again; > + } > + } > + return id; /* errno */ > + } > + mem_id_next = id + 1; > + > + return id; > +} > + > int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, > enum mem_type type, void *allocator) > { > + struct xdp_mem_allocator *xdp_alloc; > + gfp_t gfp = GFP_KERNEL; > + int id, errno, ret; > + void *ptr; > + > + if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { > + WARN(1, "Missing register, driver bug"); > + return -EFAULT; > + } > + > if (type >= MEM_TYPE_MAX) > return -EINVAL; > > xdp_rxq->mem.type = type; > > - if (allocator) > - return -EOPNOTSUPP; > + if (!allocator) > + return 0; > + > + /* Delay init of rhashtable to save memory if feature isn't used */ > + if (!mem_id_init) { > + mutex_lock(&mem_id_lock); > + ret = __mem_id_init_hash_table(); > + mutex_unlock(&mem_id_lock); > + if (ret < 0) { > + WARN_ON(1); > + return ret; > + } > + } > + > + xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); > + if (!xdp_alloc) > + return -ENOMEM; > + > + mutex_lock(&mem_id_lock); > + id = __mem_id_cyclic_get(gfp); > + if (id < 0) { > + errno = id; > + goto err; > + } > + xdp_rxq->mem.id = id; > + xdp_alloc->mem = xdp_rxq->mem; > + xdp_alloc->allocator = allocator; > + > + /* Insert allocator into ID lookup table */ > + ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); > + if (IS_ERR(ptr)) { > + errno = PTR_ERR(ptr); > + goto err; > + } > + > + mutex_unlock(&mem_id_lock); > > - /* TODO: Allocate an ID that maps to allocator pointer > - * See: https://www.kernel.org/doc/html/latest/core-api/idr.html > - */ > return 0; > +err: > + mutex_unlock(&mem_id_lock); > + kfree(xdp_alloc); > + return errno; > } > EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); > + > +void xdp_return_frame(void *data, struct xdp_mem_info *mem) > +{ > + struct xdp_mem_allocator *xa; > + > + rcu_read_lock(); > + if (mem->id) > + xa = rhashtable_lookup(mem_id_ht, &mem->id, > mem_id_rht_params); > + rcu_read_unlock(); > + > + if (mem->type == MEM_TYPE_PAGE_SHARED) { > + page_frag_free(data); > + return; > + } > + > + if (mem->type == MEM_TYPE_PAGE_ORDER0) { > + struct page *page = virt_to_page(data); /* Assumes order0 > page*/ > + > + put_page(page); > + } > +} > +EXPORT_SYMBOL_GPL(xdp_return_frame); >
I'm not sure what the point is of getting the xa value if it is not going to be used. Also I would assume there are types that won't even need the hash table lookup. I would prefer to see this bit held off on until you have something that actually needs it.