On Thu, 19 Mar 2026 23:10:35 +0100
Robin Jarry <[email protected]> wrote:

> Linux TAP devices deliver all packets to userspace regardless of the
> PROMISC/ALLMULTI flags on the interface. Add an opt-in "macfilter"
> devarg that, when enabled, drops received packets whose destination MAC
> does not match any configured unicast or multicast address.
> 
> When macfilter is active the receive path checks the destination MAC
> against the device's unicast address table (managed by the ethdev
> layer), the multicast address list (stored by the driver since the
> ethdev layer does not keep a copy), and accepts broadcast
> unconditionally. Promiscuous and all-multicast modes bypass the
> respective checks.
> 
> To support multiple unicast addresses via rte_eth_dev_mac_addr_add(),
> allocate mac_addrs with rte_zmalloc (TAP_MAX_MAC_ADDRS=16) instead of
> pointing into dev_private, and advertise the new limit in dev_infos_get.
> 
> Dropped packets are reported via per-queue xstats
> (rx_q<N>_mac_filter_drops).
> 
> Signed-off-by: Robin Jarry <[email protected]>
> ---
>  drivers/net/tap/rte_eth_tap.c | 178 ++++++++++++++++++++++++++++++----
>  drivers/net/tap/rte_eth_tap.h |   7 ++
>  2 files changed, 167 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 13e0a23c34a1..f4ea3bc5d160 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -53,11 +53,14 @@
>  #define ETH_TAP_MAC_ARG         "mac"
>  #define ETH_TAP_MAC_FIXED       "fixed"
>  #define ETH_TAP_PERSIST_ARG     "persist"
> +#define ETH_TAP_MAC_FILTER_ARG  "macfilter"
>  
>  #define ETH_TAP_USR_MAC_FMT     "xx:xx:xx:xx:xx:xx"
>  #define ETH_TAP_CMP_MAC_FMT     "0123456789ABCDEFabcdef"
>  #define ETH_TAP_MAC_ARG_FMT     ETH_TAP_MAC_FIXED "|" ETH_TAP_USR_MAC_FMT
>  
> +#define TAP_MAX_MAC_ADDRS    16
> +
>  #define TAP_GSO_MBUFS_PER_CORE       128
>  #define TAP_GSO_MBUF_SEG_SIZE        128
>  #define TAP_GSO_MBUF_CACHE_SIZE      4
> @@ -110,6 +113,7 @@ static const char *valid_arguments[] = {
>       ETH_TAP_REMOTE_ARG,
>       ETH_TAP_MAC_ARG,
>       ETH_TAP_PERSIST_ARG,
> +     ETH_TAP_MAC_FILTER_ARG,
>       NULL
>  };
>  
> @@ -437,6 +441,45 @@ tap_rxq_pool_free(struct rte_mbuf *pool)
>       rte_pktmbuf_free(pool);
>  }
>  
> +static inline bool
> +tap_mac_filter_match(struct rx_queue *rxq, struct rte_mbuf *mbuf)
> +{
> +     struct pmd_internals *pmd = rxq->pmd;
> +     struct rte_eth_dev_data *data;
> +     struct rte_ether_addr *dst;
> +     uint32_t i;
> +
> +     if (!pmd->macfilter)
> +             return true;
> +
> +     data = pmd->dev->data;
> +     if (data->promiscuous)
> +             return true;
> +
> +     dst = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
> +
> +     if (rte_is_broadcast_ether_addr(dst))
> +             return true;
> +
> +     if (rte_is_multicast_ether_addr(dst)) {
> +             if (data->all_multicast)
> +                     return true;
> +             for (i = 0; i < pmd->nb_mc_addrs; i++) {
> +                     if (rte_is_same_ether_addr(dst, &pmd->mc_addrs[i]))
> +                             return true;
> +             }
> +             return false;
> +     }
> +
> +     for (i = 0; i < TAP_MAX_MAC_ADDRS; i++) {
> +             if (rte_is_zero_ether_addr(&data->mac_addrs[i]))
> +                     continue;
> +             if (rte_is_same_ether_addr(dst, &data->mac_addrs[i]))
> +                     return true;
> +     }
> +     return false;
> +}
> +
>  /* Callback to handle the rx burst of packets to the correct interface and
>   * file descriptor(s) in a multi-queue setup.
>   */
> @@ -515,6 +558,13 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, 
> uint16_t nb_pkts)
>                       data_off = 0;
>               }
>               seg->next = NULL;
> +
> +             if (!tap_mac_filter_match(rxq, mbuf)) {
> +                     rxq->stats.mac_drops++;
> +                     rte_pktmbuf_free(mbuf);
> +                     continue;
> +             }
> +
>               mbuf->packet_type = rte_net_get_ptype(mbuf, NULL,
>                                                     RTE_PTYPE_ALL_MASK);
>               if (rxq->rxmode->offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM)
> @@ -933,7 +983,7 @@ tap_dev_info(struct rte_eth_dev *dev, struct 
> rte_eth_dev_info *dev_info)
>       struct pmd_internals *internals = dev->data->dev_private;
>  
>       dev_info->if_index = internals->if_index;
> -     dev_info->max_mac_addrs = 1;
> +     dev_info->max_mac_addrs = TAP_MAX_MAC_ADDRS;
>       dev_info->max_rx_pktlen = RTE_ETHER_MAX_JUMBO_FRAME_LEN;
>       dev_info->max_rx_queues = RTE_PMD_TAP_MAX_QUEUES;
>       dev_info->max_tx_queues = RTE_PMD_TAP_MAX_QUEUES;
> @@ -1025,6 +1075,43 @@ tap_stats_reset(struct rte_eth_dev *dev)
>       return 0;
>  }
>  
> +static int
> +tap_xstats_get_names(struct rte_eth_dev *dev,
> +                  struct rte_eth_xstat_name *names,
> +                  unsigned int limit __rte_unused)
> +{
> +     unsigned int i;
> +
> +     if (names == NULL)
> +             return dev->data->nb_rx_queues;
> +
> +     for (i = 0; i < dev->data->nb_rx_queues; i++)
> +             snprintf(names[i].name, sizeof(names[i].name),
> +                      "rx_q%u_mac_filter_drops", i);
> +
> +     return dev->data->nb_rx_queues;
> +}
> +
> +static int
> +tap_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
> +            unsigned int n)
> +{
> +     struct rte_eth_dev_data *data = dev->data;
> +     struct rx_queue *rxq;
> +     unsigned int i;
> +
> +     if (n < dev->data->nb_rx_queues)
> +             return dev->data->nb_rx_queues;
> +
> +     for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +             rxq = data->rx_queues[i];
> +             xstats[i].id = i;
> +             xstats[i].value = rxq->stats.mac_drops;
> +     }
> +
> +     return dev->data->nb_rx_queues;
> +}
> +
>  static void
>  tap_queue_close(struct pmd_process_private *process_private, uint16_t qid)
>  {
> @@ -1089,14 +1176,15 @@ tap_dev_close(struct rte_eth_dev *dev)
>       rte_mempool_free(internals->gso_ctx_mp);
>       internals->gso_ctx_mp = NULL;
>  
> +     rte_free(internals->mc_addrs);
> +     internals->mc_addrs = NULL;
> +     internals->nb_mc_addrs = 0;
> +
>       if (internals->ka_fd != -1) {
>               close(internals->ka_fd);
>               internals->ka_fd = -1;
>       }
>  
> -     /* mac_addrs must not be freed alone because part of dev_private */
> -     dev->data->mac_addrs = NULL;
> -
>       internals = dev->data->dev_private;
>       TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u",
>               tuntap_types[internals->type], rte_socket_id());
> @@ -1574,6 +1662,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
>       }
>       tmp = &rxq->pool;
>  
> +     rxq->pmd = internals;
>       rxq->mp = mp;
>       rxq->trigger_seen = 1; /* force initial burst */
>       rxq->in_port = dev->data->port_id;
> @@ -1692,17 +1781,50 @@ tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
>  }
>  
>  static int
> -tap_set_mc_addr_list(struct rte_eth_dev *dev __rte_unused,
> -                  struct rte_ether_addr *mc_addr_set __rte_unused,
> -                  uint32_t nb_mc_addr __rte_unused)
> +tap_set_mc_addr_list(struct rte_eth_dev *dev,
> +                  struct rte_ether_addr *mc_addr_set,
> +                  uint32_t nb_mc_addr)
>  {
> -     /*
> -      * Nothing to do actually: the tap has no filtering whatsoever, every
> -      * packet is received.
> -      */
> +     struct pmd_internals *pmd = dev->data->dev_private;
> +
> +     if (nb_mc_addr == 0) {
> +             rte_free(pmd->mc_addrs);
> +             pmd->mc_addrs = NULL;
> +             pmd->nb_mc_addrs = 0;
> +             return 0;
> +     }
> +
> +     pmd->mc_addrs = rte_realloc(pmd->mc_addrs,
> +                                 nb_mc_addr * sizeof(*pmd->mc_addrs), 0);
> +     if (pmd->mc_addrs == NULL) {
> +             pmd->nb_mc_addrs = 0;
> +             return -ENOMEM;
> +     }
> +
> +     memcpy(pmd->mc_addrs, mc_addr_set,
> +            nb_mc_addr * sizeof(*pmd->mc_addrs));
> +     pmd->nb_mc_addrs = nb_mc_addr;
> +
>       return 0;
>  }
>  
> +static int
> +tap_mac_addr_add(struct rte_eth_dev *dev __rte_unused,
> +              struct rte_ether_addr *mac_addr __rte_unused,
> +              uint32_t index __rte_unused,
> +              uint32_t vmdq __rte_unused)
> +{
> +     /* ethdev layer already stores the address in mac_addrs[] */
> +     return 0;
> +}
> +
> +static void
> +tap_mac_addr_remove(struct rte_eth_dev *dev __rte_unused,
> +                 uint32_t index __rte_unused)
> +{
> +     /* ethdev layer already zeroes the slot in mac_addrs[] */
> +}
> +
>  static void tap_dev_intr_handler(void *cb_arg);
>  static int tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set);
>  
> @@ -2038,10 +2160,15 @@ static const struct eth_dev_ops ops = {
>       .allmulticast_enable    = tap_allmulti_enable,
>       .allmulticast_disable   = tap_allmulti_disable,
>       .mac_addr_set           = tap_mac_set,
> +     .mac_addr_add           = tap_mac_addr_add,
> +     .mac_addr_remove        = tap_mac_addr_remove,
>       .mtu_set                = tap_mtu_set,
>       .set_mc_addr_list       = tap_set_mc_addr_list,
>       .stats_get              = tap_stats_get,
>       .stats_reset            = tap_stats_reset,
> +     .xstats_get             = tap_xstats_get,
> +     .xstats_get_names       = tap_xstats_get_names,
> +     .xstats_reset           = tap_stats_reset,
>       .dev_supported_ptypes_get = tap_dev_supported_ptypes_get,
>       .rss_hash_update        = tap_rss_hash_update,
>  #ifdef HAVE_TCA_FLOWER
> @@ -2052,7 +2179,7 @@ static const struct eth_dev_ops ops = {
>  static int
>  eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
>                  const char *remote_iface, struct rte_ether_addr *mac_addr,
> -                enum rte_tuntap_type type, int persist)
> +                enum rte_tuntap_type type, int persist, int macfilter)
>  {
>       int numa_node = rte_socket_id();
>       struct rte_eth_dev *dev;
> @@ -2102,7 +2229,14 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const 
> char *tap_name,
>       data->numa_node = numa_node;
>  
>       data->dev_link = pmd_link;
> -     data->mac_addrs = &pmd->eth_addr;
> +     data->mac_addrs = rte_zmalloc_socket(rte_vdev_device_name(vdev),
> +                                          TAP_MAX_MAC_ADDRS *
> +                                          sizeof(struct rte_ether_addr),
> +                                          0, numa_node);
> +     if (data->mac_addrs == NULL) {
> +             TAP_LOG(ERR, "Failed to allocate mac_addrs");
> +             goto error_exit;
> +     }
>       /* Set the number of RX and TX queues */
>       data->nb_rx_queues = 0;
>       data->nb_tx_queues = 0;
> @@ -2120,6 +2254,8 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const 
> char *tap_name,
>               process_private->fds[i] = -1;
>  
>  
> +     pmd->macfilter = macfilter;
> +
>       if (pmd->type == ETH_TUNTAP_TYPE_TAP) {
>               if (rte_is_zero_ether_addr(mac_addr))
>                       rte_eth_random_addr((uint8_t *)&pmd->eth_addr);
> @@ -2227,6 +2363,9 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const 
> char *tap_name,
>       }
>  #endif
>  
> +     /* Copy final MAC to slot 0 (remote path may have overwritten it) */
> +     data->mac_addrs[0] = pmd->eth_addr;
> +
>       rte_eth_dev_probing_finish(dev);
>       return 0;
>  
> @@ -2246,8 +2385,6 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const 
> char *tap_name,
>       free(dev->process_private);
>  
>  error_exit_nodev_release:
> -     /* mac_addrs must not be freed alone because part of dev_private */
> -     dev->data->mac_addrs = NULL;
>       rte_eth_dev_release_port(dev);
>  
>  error_exit_nodev:
> @@ -2405,7 +2542,7 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
>       TAP_LOG(DEBUG, "Initializing pmd_tun for %s", name);
>  
>       ret = eth_dev_tap_create(dev, tun_name, remote_iface, 0,
> -                              ETH_TUNTAP_TYPE_TUN, 0);
> +                              ETH_TUNTAP_TYPE_TUN, 0, 0);
>  
>  leave:
>       if (ret == -1) {
> @@ -2529,6 +2666,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
>       struct rte_eth_dev *eth_dev;
>       int tap_devices_count_increased = 0;
>       int persist = 0;
> +     int macfilter = 0;
>  
>       name = rte_vdev_device_name(dev);
>       params = rte_vdev_device_args(dev);
> @@ -2617,6 +2755,9 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
>  
>                       if (rte_kvargs_count(kvlist, ETH_TAP_PERSIST_ARG) == 1)
>                               persist = 1;
> +
> +                     if (rte_kvargs_count(kvlist, ETH_TAP_MAC_FILTER_ARG) == 
> 1)
> +                             macfilter = 1;
>               }
>       }
>  
> @@ -2634,7 +2775,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
>       tap_devices_count++;
>       tap_devices_count_increased = 1;
>       ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
> -                              ETH_TUNTAP_TYPE_TAP, persist);
> +                              ETH_TUNTAP_TYPE_TAP, persist, macfilter);
>  
>  leave:
>       if (ret == -1) {
> @@ -2687,5 +2828,6 @@ RTE_PMD_REGISTER_PARAM_STRING(net_tun,
>  RTE_PMD_REGISTER_PARAM_STRING(net_tap,
>                             ETH_TAP_IFACE_ARG "=<string> "
>                             ETH_TAP_MAC_ARG "=" ETH_TAP_MAC_ARG_FMT " "
> -                           ETH_TAP_REMOTE_ARG "=<string>");
> +                           ETH_TAP_REMOTE_ARG "=<string> "
> +                           ETH_TAP_MAC_FILTER_ARG "=<int>");
>  RTE_LOG_REGISTER_DEFAULT(tap_logtype, NOTICE);
> diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
> index b44eaf9a1bdb..dd65b33bf351 100644
> --- a/drivers/net/tap/rte_eth_tap.h
> +++ b/drivers/net/tap/rte_eth_tap.h
> @@ -38,9 +38,13 @@ struct queue_stats {
>       uint64_t packets;
>       uint64_t bytes;
>       uint64_t errors;
> +     uint64_t mac_drops;             /* Packets dropped by MAC filter */
>  };
>  
> +struct pmd_internals;
> +
>  struct rx_queue {
> +     struct pmd_internals *pmd;      /* back-pointer to driver state */
>       struct rte_mempool *mp;         /* Mempool for RX packets */
>       uint32_t trigger_seen;          /* Last seen Rx trigger value */
>       uint16_t in_port;               /* Port ID */
> @@ -69,7 +73,10 @@ struct pmd_internals {
>       char name[IFNAMSIZ];              /* Internal Tap device name */
>       int type;                         /* Type field - TUN|TAP */
>       int persist;                      /* 1 if keep link up, else 0 */
> +     int macfilter;                    /* SW MAC filtering enabled */
>       struct rte_ether_addr eth_addr;   /* Mac address of the device port */
> +     struct rte_ether_addr *mc_addrs;  /* multicast address list */
> +     uint32_t nb_mc_addrs;             /* multicast address count */
>       unsigned int remote_initial_flags;/* Remote netdevice flags on init */
>       int remote_if_index;              /* remote netdevice IF_INDEX */
>       int if_index;                     /* IF_INDEX for the port */

TAP should just always do it. TAP should behave like a real NIC.
Flags should be uint8_t of bool not int.

Reply via email to