On 10/31/2024 1:50 PM, Gur Stavi wrote:
> net_af_packet PMD multi "queue" support relies on Linux FANOUT capability.
> Linux FANOUT is a SW based load balancer that is similar to HW RSS which
> is more common for DPDK PMDs. Instead of multiple HW descriptor queues,
> AF PACKET uses multiple sockets.
> HW RSS will typically drop a packet if its selected RX queue is empty.
> However, Linux FANOUT, as a SW load balancer, can be configured to avoid
> this packet drop by rolling over to the next socket.
> This rollover functionality was ALWAYS enabled in net_af_packet. It is
> surrounded by ifdef, but only to allow compilation on ancient Linux
> versions that did not have it.
> 
> Since DPDK applications are usually designed for HW based PMDs, this
> rollover functionality, which the developers are likely unaware of, could
> be confusing.
> 
> Another option that is part of Linux FANOUT is DEFRAG that instructs Linux
> to compose complete IP packet out of fragments before delivering it to the
> PACKET socket. Again, this behavior typically does not exist for HW based
> PMDs and may confuse users.
> 
> This patch adds 2 options to control these features:
> rollover=[0|1],defrag=[0|1]
> For backward compatibility both features are enabled by default even
> though most users will probably want both of them disabled.
> 
> Signed-off-by: Gur Stavi <gur.st...@huawei.com>
> ---
> v2:
> * Improve error handling for parsing of arg strings. Add a wrapper around
>   strtoul to replace atoi.
> * Fix checkpatch indentation error due to copy-paste.
> 
> v1: https://mails.dpdk.org/archives/dev/2024-October/307451.html
> ---
>  doc/guides/nics/af_packet.rst             |  4 ++
>  drivers/net/af_packet/rte_eth_af_packet.c | 77 ++++++++++++++++++-----
>  2 files changed, 65 insertions(+), 16 deletions(-)
> 
> diff --git a/doc/guides/nics/af_packet.rst b/doc/guides/nics/af_packet.rst
> index 66b977e1a2..4bc748b50b 100644
> --- a/doc/guides/nics/af_packet.rst
> +++ b/doc/guides/nics/af_packet.rst
> @@ -29,6 +29,8 @@ Some of these, in turn, will be used to configure the 
> PACKET_MMAP settings.
>  *   ``framesz`` - PACKET_MMAP frame size (optional, default 2048B; Note: 
> multiple
>      of 16B);
>  *   ``framecnt`` - PACKET_MMAP frame count (optional, default 512).
> +*   ``rollover`` - disable(0)/enable(1) PACKET_FANOUT_ROLLOVER (optional, 
> default 1).
> +*   ``defrag`` - disable(0)/enable(1) PACKET_FANOUT_FLAG_DEFRAG (optional, 
> default 1).
> 

+1 to introduce both options, thanks.
Do you have any practical usecase to disable them?


>  Because this implementation is based on PACKET_MMAP, and PACKET_MMAP has its
>  own pre-requisites, it should be noted that the inner workings of PACKET_MMAP
> @@ -47,6 +49,8 @@ For the full details behind PACKET_MMAP's structures and 
> settings, consider
>  reading the `PACKET_MMAP documentation in the Kernel
>  <https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt>`_.
> 
> +For details of ``rollover`` anb ``defrag`` refer to the *packet(7)* man page.
> +
>  Prerequisites
>  -------------
> 
> diff --git a/drivers/net/af_packet/rte_eth_af_packet.c 
> b/drivers/net/af_packet/rte_eth_af_packet.c
> index 68870dd3e2..1a043e8398 100644
> --- a/drivers/net/af_packet/rte_eth_af_packet.c
> +++ b/drivers/net/af_packet/rte_eth_af_packet.c
> @@ -36,6 +36,8 @@
>  #define ETH_AF_PACKET_FRAMESIZE_ARG  "framesz"
>  #define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt"
>  #define ETH_AF_PACKET_QDISC_BYPASS_ARG       "qdisc_bypass"
> +#define ETH_AF_PACKET_ROLLOVER               "rollover"
> +#define ETH_AF_PACKET_DEFRAG         "defrag"
> 

Can you please add new arguments to 'RTE_PMD_REGISTER_PARAM_STRING'
macro below?

>  #define DFLT_FRAME_SIZE              (1 << 11)
>  #define DFLT_FRAME_COUNT     (1 << 9)
> @@ -91,6 +93,8 @@ static const char *valid_arguments[] = {
>       ETH_AF_PACKET_FRAMESIZE_ARG,
>       ETH_AF_PACKET_FRAMECOUNT_ARG,
>       ETH_AF_PACKET_QDISC_BYPASS_ARG,
> +     ETH_AF_PACKET_ROLLOVER,
> +     ETH_AF_PACKET_DEFRAG,
>       NULL
>  };
> 
> @@ -659,6 +663,20 @@ open_packet_iface(const char *key __rte_unused,
>       return 0;
>  }
> 
> +static int
> +uint_arg_parse(const char *arg_str, unsigned int *arg_val)
> +{
> +     unsigned long val;
> +     char *endptr;
> +
> +     val = strtoul(arg_str, &endptr, 10);
> +     if (*arg_str == '\0' || *endptr != '\0')
> +             return -EINVAL;
> +
> +     *arg_val = val;
> +     return 0;
> +}
> +
>  static int
>  rte_pmd_init_internals(struct rte_vdev_device *dev,
>                         const int sockfd,
> @@ -676,6 +694,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>       const unsigned int numa_node = dev->device.numa_node;
>       struct rte_eth_dev_data *data = NULL;
>       struct rte_kvargs_pair *pair = NULL;
> +     const char *ifname = NULL;
>       struct ifreq ifr;
>       size_t ifnamelen;
>       unsigned k_idx;
> @@ -686,6 +705,8 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>       int rc, tpver, discard;
>       int qsockfd = -1;
>       unsigned int i, q, rdsize;
> +     unsigned int rollover = 1;
> +     unsigned int defrag = 1;
>  #if defined(PACKET_FANOUT)
>       int fanout_arg;
>  #endif
> @@ -693,9 +714,30 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>       for (k_idx = 0; k_idx < kvlist->count; k_idx++) {
>               pair = &kvlist->pairs[k_idx];
>               if (strstr(pair->key, ETH_AF_PACKET_IFACE_ARG) != NULL)
> -                     break;
> +                     ifname = pair->value;
> +             if (strcmp(pair->key, ETH_AF_PACKET_ROLLOVER) == 0) {
> +                     rc = uint_arg_parse(pair->value, &rollover);
> +                     if (rc || rollover > 1) {
> +                             PMD_LOG(ERR,
> +                                     "%s: invalid rollover value: %s",
> +                                     name, pair->value);
> +                             return -1;
> +                     }
> +                     continue;
> +             }
> +             if (strcmp(pair->key, ETH_AF_PACKET_DEFRAG) == 0) {
> +                     rc = uint_arg_parse(pair->value, &defrag);
> +                     if (rc || defrag > 1) {
> +                             PMD_LOG(ERR,
> +                                     "%s: invalid defrag value: %s",
> +                                     name, pair->value);
> +                             return -1;
> +                     }
> +                     continue;
> +             }
>

Instead of adding parameter parsing here, they are already in
'rte_eth_from_packet()' function, better to add there,

BUT, before adding more paramter, can you please fix the parameter
parsing in the af_packet PMD? This is in my list for a long time now.

Instead of accessing to 'kvlist' internals (kvlist->count,
kvlist->pairs[]), and use 'strstr' (or 'strcmp'), there is already
existing 'rte_kvargs_process()' API for it, you can find multiple
samples in other drivers.


>       }
> -     if (pair == NULL) {
> +
> +     if (ifname == NULL) {
>               PMD_LOG(ERR,
>                       "%s: no interface specified for AF_PACKET ethdev",
>                       name);
> @@ -738,21 +780,21 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>       req->tp_frame_size = framesize;
>       req->tp_frame_nr = framecnt;
> 
> -     ifnamelen = strlen(pair->value);
> +     ifnamelen = strlen(ifname);
>       if (ifnamelen < sizeof(ifr.ifr_name)) {
> -             memcpy(ifr.ifr_name, pair->value, ifnamelen);
> +             memcpy(ifr.ifr_name, ifname, ifnamelen);
>               ifr.ifr_name[ifnamelen] = '\0';
>       } else {
>               PMD_LOG(ERR,
>                       "%s: I/F name too long (%s)",
> -                     name, pair->value);
> +                     name, ifname);
>               goto free_internals;
>       }
>       if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) {
>               PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFINDEX)", name);
>               goto free_internals;
>       }
> -     (*internals)->if_name = strdup(pair->value);
> +     (*internals)->if_name = strdup(ifname);
>       if ((*internals)->if_name == NULL)
>               goto free_internals;
>       (*internals)->if_index = ifr.ifr_ifindex;
> @@ -770,9 +812,12 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
> 
>  #if defined(PACKET_FANOUT)
>       fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff;
> -     fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16;
> +     fanout_arg |= PACKET_FANOUT_HASH << 16;
> +     if (defrag)
> +             fanout_arg |= PACKET_FANOUT_FLAG_DEFRAG << 16;
>  #if defined(PACKET_FANOUT_FLAG_ROLLOVER)
> -     fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16;
> +     if (rollover)
> +             fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16;
>  #endif
>  #endif
> 
> @@ -792,7 +837,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>               if (rc == -1) {
>                       PMD_LOG_ERRNO(ERR,
>                               "%s: could not set PACKET_VERSION on AF_PACKET 
> socket for %s",
> -                             name, pair->value);
> +                             name, ifname);
>                       goto error;
>               }
> 
> @@ -802,7 +847,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>               if (rc == -1) {
>                       PMD_LOG_ERRNO(ERR,
>                               "%s: could not set PACKET_LOSS on AF_PACKET 
> socket for %s",
> -                             name, pair->value);
> +                             name, ifname);
>                       goto error;
>               }
> 
> @@ -813,7 +858,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>                       if (rc == -1) {
>                               PMD_LOG_ERRNO(ERR,
>                                       "%s: could not set PACKET_QDISC_BYPASS 
> on AF_PACKET socket for %s",
> -                                     name, pair->value);
> +                                     name, ifname);
>                               goto error;
>                       }
>  #endif
> @@ -823,7 +868,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>               if (rc == -1) {
>                       PMD_LOG_ERRNO(ERR,
>                               "%s: could not set PACKET_RX_RING on AF_PACKET 
> socket for %s",
> -                             name, pair->value);
> +                             name, ifname);
>                       goto error;
>               }
> 
> @@ -831,7 +876,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>               if (rc == -1) {
>                       PMD_LOG_ERRNO(ERR,
>                               "%s: could not set PACKET_TX_RING on AF_PACKET "
> -                             "socket for %s", name, pair->value);
> +                             "socket for %s", name, ifname);
>                       goto error;
>               }
> 
> @@ -844,7 +889,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>               if (rx_queue->map == MAP_FAILED) {
>                       PMD_LOG_ERRNO(ERR,
>                               "%s: call to mmap failed on AF_PACKET socket 
> for %s",
> -                             name, pair->value);
> +                             name, ifname);
>                       goto error;
>               }
> 
> @@ -881,7 +926,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>               if (rc == -1) {
>                       PMD_LOG_ERRNO(ERR,
>                               "%s: could not bind AF_PACKET socket to %s",
> -                             name, pair->value);
> +                             name, ifname);
>                       goto error;
>               }
> 
> @@ -891,7 +936,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>               if (rc == -1) {
>                       PMD_LOG_ERRNO(ERR,
>                               "%s: could not set PACKET_FANOUT on AF_PACKET 
> socket for %s",
> -                             name, pair->value);
> +                             name, ifname);
>                       goto error;
>               }
>  #endif
> 
> base-commit: 98613d32e3dac58d685f4f236cf8cc9733abaaf3
> --
> 2.40.1
> 

Reply via email to