On Wed, Apr 14, 2021 at 08:26:07PM +0800, Hangbin Liu wrote:
[ ... ]

> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index aa516472ce46..3980fb3bfb09 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -57,6 +57,7 @@ struct xdp_dev_bulk_queue {
>       struct list_head flush_node;
>       struct net_device *dev;
>       struct net_device *dev_rx;
> +     struct bpf_prog *xdp_prog;
>       unsigned int count;
>  };
>  
> @@ -326,22 +327,71 @@ bool dev_map_can_have_prog(struct bpf_map *map)
>       return false;
>  }
>  
> +static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
> +                             struct xdp_frame **frames, int n,
> +                             struct net_device *dev)
> +{
> +     struct xdp_txq_info txq = { .dev = dev };
> +     struct xdp_buff xdp;
> +     int i, nframes = 0;
> +
> +     for (i = 0; i < n; i++) {
> +             struct xdp_frame *xdpf = frames[i];
> +             u32 act;
> +             int err;
> +
> +             xdp_convert_frame_to_buff(xdpf, &xdp);
> +             xdp.txq = &txq;
> +
> +             act = bpf_prog_run_xdp(xdp_prog, &xdp);
> +             switch (act) {
> +             case XDP_PASS:
> +                     err = xdp_update_frame_from_buff(&xdp, xdpf);
> +                     if (unlikely(err < 0))
> +                             xdp_return_frame_rx_napi(xdpf);
> +                     else
> +                             frames[nframes++] = xdpf;
> +                     break;
> +             default:
> +                     bpf_warn_invalid_xdp_action(act);
> +                     fallthrough;
> +             case XDP_ABORTED:
> +                     trace_xdp_exception(dev, xdp_prog, act);
> +                     fallthrough;
> +             case XDP_DROP:
> +                     xdp_return_frame_rx_napi(xdpf);
> +                     break;
> +             }
> +     }
> +     return nframes; /* sent frames count */
> +}
> +
>  static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
>  {
>       struct net_device *dev = bq->dev;
> -     int sent = 0, err = 0;
> +     int sent = 0, drops = 0, err = 0;
> +     unsigned int cnt = bq->count;
> +     int to_send = cnt;
>       int i;
>  
> -     if (unlikely(!bq->count))
> +     if (unlikely(!cnt))
>               return;
>  
> -     for (i = 0; i < bq->count; i++) {
> +     for (i = 0; i < cnt; i++) {
>               struct xdp_frame *xdpf = bq->q[i];
>  
>               prefetch(xdpf);
>       }
>  
> -     sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
> +     if (bq->xdp_prog) {
bq->xdp_prog is used here

> +             to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
> +             if (!to_send)
> +                     goto out;
> +
> +             drops = cnt - to_send;
> +     }
> +

[ ... ]

>  static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
> -                    struct net_device *dev_rx)
> +                    struct net_device *dev_rx, struct bpf_prog *xdp_prog)
>  {
>       struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
>       struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
> @@ -412,18 +466,22 @@ static void bq_enqueue(struct net_device *dev, struct 
> xdp_frame *xdpf,
>       /* Ingress dev_rx will be the same for all xdp_frame's in
>        * bulk_queue, because bq stored per-CPU and must be flushed
>        * from net_device drivers NAPI func end.
> +      *
> +      * Do the same with xdp_prog and flush_list since these fields
> +      * are only ever modified together.
>        */
> -     if (!bq->dev_rx)
> +     if (!bq->dev_rx) {
>               bq->dev_rx = dev_rx;
> +             bq->xdp_prog = xdp_prog;
bp->xdp_prog is assigned here and could be used later in bq_xmit_all().
How is bq->xdp_prog protected? Are they all under one rcu_read_lock()?
It is not very obvious after taking a quick look at xdp_do_flush[_map].

e.g. what if the devmap elem gets deleted.

[ ... ]

>  static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
> -                            struct net_device *dev_rx)
> +                             struct net_device *dev_rx,
> +                             struct bpf_prog *xdp_prog)
>  {
>       struct xdp_frame *xdpf;
>       int err;
> @@ -439,42 +497,14 @@ static inline int __xdp_enqueue(struct net_device *dev, 
> struct xdp_buff *xdp,
>       if (unlikely(!xdpf))
>               return -EOVERFLOW;
>  
> -     bq_enqueue(dev, xdpf, dev_rx);
> +     bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
>       return 0;
>  }
>  
[ ... ]

> @@ -482,12 +512,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct 
> xdp_buff *xdp,
>  {
>       struct net_device *dev = dst->dev;
>  
> -     if (dst->xdp_prog) {
> -             xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
> -             if (!xdp)
> -                     return 0;
> -     }
> -     return __xdp_enqueue(dev, xdp, dev_rx);
> +     return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
>  }

Reply via email to