> Subject: [EXTERNAL] [RFC] app/testpmd: add configurable flow count for txonly
> multi-flow
> 
> Started with Long's patch to change port and added a parameter.
> This is suggestion only *DO NOT MERGE*
> Not sure what good name for parameter is, this is just a quick hack.
> 
> The txonly multi-flow mode generates 64 unique UDP source ports per lcore by
> cycling the high byte from 0xC0 to 0xFF. On SmartNICs with limited hardware
> flow table caching, this fixed count can exhaust the flow cache and degrade
> receive-side performance.
> 
> Add --txonly-nb-flows=N command line parameter and 'set txonly-nb-flows'
> runtime command to limit the number of unique source ports per lcore to
> between 1 and 64. The default remains 64 to preserve existing behavior.
> 
> The source port encoding is unchanged: the low byte carries the lcore ID 
> (avoiding
> atomics) and the high byte cycles through N values starting at 0xC0. Total 
> unique
> flows = txonly_nb_flows * active_lcores.
> 
> Reported-by: Long Li <[email protected]>
> Signed-off-by: Stephen Hemminger <[email protected]>

Thank you, worked well for Azure VM at 200Gb/s.

Tested-by: Long Li <[email protected]>


> ---
> 
>  app/test-pmd/cmdline.c                      | 49 +++++++++++++++++++++
>  app/test-pmd/parameters.c                   | 13 ++++++
>  app/test-pmd/testpmd.c                      |  3 ++
>  app/test-pmd/testpmd.h                      |  1 +
>  app/test-pmd/txonly.c                       | 24 +++++-----
>  doc/guides/testpmd_app_ug/run_app.rst       |  8 ++++
>  doc/guides/testpmd_app_ug/testpmd_funcs.rst | 16 +++++++
>  7 files changed, 104 insertions(+), 10 deletions(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> c33c66f327..debd226762 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -384,6 +384,10 @@ static void cmd_help_long_parsed(void *parsed_result,
>                       "    Set the scheduling on timestamps"
>                       " timings for the TXONLY mode\n\n"
> 
> +                     "set txonly-nb-flows (N)\n"
> +                     "    Set the number of flows per lcore in"
> +                     " txonly multi-flow mode (1-64)\n\n"
> +
>                       "set corelist (x[,y]*)\n"
>                       "    Set the list of forwarding cores.\n\n"
> 
> @@ -4612,6 +4616,50 @@ static cmdline_parse_inst_t cmd_set_txtimes = {
>       },
>  };
> 
> +/* *** SET NUMBER OF FLOWS IN TXONLY MULTI-FLOW MODE *** */
> +
> +struct cmd_set_txonly_nb_flows_result {
> +     cmdline_fixed_string_t cmd_keyword;
> +     cmdline_fixed_string_t name;
> +     uint16_t value;
> +};
> +
> +static void
> +cmd_set_txonly_nb_flows_parsed(void *parsed_result,
> +                            __rte_unused struct cmdline *cl,
> +                            __rte_unused void *data)
> +{
> +     struct cmd_set_txonly_nb_flows_result *res = parsed_result;
> +
> +     if (res->value < 1 || res->value > 64) {
> +             fprintf(stderr, "txonly-nb-flows must be >= 1 and <= 64\n");
> +             return;
> +     }
> +     txonly_nb_flows = res->value;
> +}
> +
> +static cmdline_parse_token_string_t cmd_set_txonly_nb_flows_keyword =
> +     TOKEN_STRING_INITIALIZER(struct cmd_set_txonly_nb_flows_result,
> +                              cmd_keyword, "set");
> +static cmdline_parse_token_string_t cmd_set_txonly_nb_flows_name =
> +     TOKEN_STRING_INITIALIZER(struct cmd_set_txonly_nb_flows_result,
> +                              name, "txonly-nb-flows");
> +static cmdline_parse_token_num_t cmd_set_txonly_nb_flows_value =
> +     TOKEN_NUM_INITIALIZER(struct cmd_set_txonly_nb_flows_result,
> +                           value, RTE_UINT16);
> +
> +static cmdline_parse_inst_t cmd_set_txonly_nb_flows = {
> +     .f = cmd_set_txonly_nb_flows_parsed,
> +     .data = NULL,
> +     .help_str = "set txonly-nb-flows <N>",
> +     .tokens = {
> +             (void *)&cmd_set_txonly_nb_flows_keyword,
> +             (void *)&cmd_set_txonly_nb_flows_name,
> +             (void *)&cmd_set_txonly_nb_flows_value,
> +             NULL,
> +     },
> +};
> +
>  /* *** ADD/REMOVE ALL VLAN IDENTIFIERS TO/FROM A PORT VLAN RX FILTER
> *** */  struct cmd_rx_vlan_filter_all_result {
>       cmdline_fixed_string_t rx_vlan;
> @@ -14102,6 +14150,7 @@ static cmdline_parse_ctx_t builtin_ctx[] = {
>       &cmd_set_txpkts,
>       &cmd_set_txsplit,
>       &cmd_set_txtimes,
> +     &cmd_set_txonly_nb_flows,
>       &cmd_set_fwd_list,
>       &cmd_set_fwd_mask,
>       &cmd_set_fwd_mode,
> diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c index
> f2037925c2..2432d720fb 100644
> --- a/app/test-pmd/parameters.c
> +++ b/app/test-pmd/parameters.c
> @@ -193,6 +193,8 @@ enum {
>       TESTPMD_OPT_MULTI_RX_MEMPOOL_NUM,
>  #define TESTPMD_OPT_TXONLY_MULTI_FLOW "txonly-multi-flow"
>       TESTPMD_OPT_TXONLY_MULTI_FLOW_NUM,
> +#define TESTPMD_OPT_TXONLY_NB_FLOWS "txonly-nb-flows"
> +     TESTPMD_OPT_TXONLY_NB_FLOWS_NUM,
>  #define TESTPMD_OPT_RXQ_SHARE "rxq-share"
>       TESTPMD_OPT_RXQ_SHARE_NUM,
>  #define TESTPMD_OPT_ETH_LINK_SPEED "eth-link-speed"
> @@ -348,6 +350,7 @@ static const struct option long_options[] = {
>       REQUIRED_ARG(TESTPMD_OPT_TXPKTS),
>       NO_ARG(TESTPMD_OPT_MULTI_RX_MEMPOOL),
>       NO_ARG(TESTPMD_OPT_TXONLY_MULTI_FLOW),
> +     REQUIRED_ARG(TESTPMD_OPT_TXONLY_NB_FLOWS),
>       OPTIONAL_ARG(TESTPMD_OPT_RXQ_SHARE),
>       REQUIRED_ARG(TESTPMD_OPT_ETH_LINK_SPEED),
>       NO_ARG(TESTPMD_OPT_DISABLE_LINK_CHECK),
> @@ -499,6 +502,8 @@ usage(char* progname)
>               " or total packet length.\n");
>       printf("  --multi-rx-mempool: enable multi-rx-mempool support\n");
>       printf("  --txonly-multi-flow: generate multiple flows in txonly 
> mode\n");
> +     printf("  --txonly-nb-flows=N: number of flows per lcore in txonly"
> +            " multi-flow mode (1-64, default 64)\n");
>       printf("  --tx-ip=src,dst: IP addresses in Tx-only mode\n");
>       printf("  --tx-udp=src[,dst]: UDP ports in Tx-only mode\n");
>       printf("  --eth-link-speed: force link speed.\n"); @@ -1566,6 +1571,14
> @@ launch_args_parse(int argc, char** argv)
>               case TESTPMD_OPT_TXONLY_MULTI_FLOW_NUM:
>                       txonly_multi_flow = 1;
>                       break;
> +             case TESTPMD_OPT_TXONLY_NB_FLOWS_NUM:
> +                     n = atoi(optarg);
> +                     if (n >= 1 && n <= 64)
> +                             txonly_nb_flows = (uint16_t)n;
> +                     else
> +                             rte_exit(EXIT_FAILURE,
> +                                      "txonly-nb-flows must be >= 1 and <=
> 64\n");
> +                     break;
>               case TESTPMD_OPT_RXQ_SHARE_NUM:
>                       if (optarg == NULL) {
>                               rxq_share = UINT32_MAX;
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> fbacee89ea..6661bf16cd 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -287,6 +287,9 @@ enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
> uint8_t txonly_multi_flow;  /**< Whether multiple flows are generated in
> TXONLY mode. */
> 
> +uint16_t txonly_nb_flows = 64;
> +/**< Number of unique flows per lcore in TXONLY multi-flow mode. */
> +
>  uint32_t tx_pkt_times_inter;
>  /**< Timings for send scheduling in TXONLY mode, time between bursts. */
> 
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index
> f319471c73..13c3915848 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -673,6 +673,7 @@ enum tx_pkt_split {
>  extern enum tx_pkt_split tx_pkt_split;
> 
>  extern uint8_t txonly_multi_flow;
> +extern uint16_t txonly_nb_flows; /**< Number of flows in txonly
> +multi-flow */
> 
>  extern uint32_t rxq_share;
> 
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
> bdcf6ea660..7ba9abf656 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -223,18 +223,22 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct
> rte_mempool *mbp,
>                               sizeof(struct rte_ether_hdr) +
>                               sizeof(struct rte_ipv4_hdr));
>               /*
> -              * Generate multiple flows by varying UDP source port.
> -              * This enables packets are well distributed by RSS in
> -              * receiver side if any and txonly mode can be a decent
> -              * packet generator for developer's quick performance
> -              * regression test.
> +              * Generate a configurable number of flows per lcore by
> +              * varying the UDP source port. The low byte is the lcore
> +              * ID, ensuring each lcore produces unique ports without
> +              * atomic operations. The high byte cycles through
> +              * txonly_nb_flows values starting at 0xC0, keeping ports
> +              * in the ephemeral range 49152-65535 (RFC 6335).
>                *
> -              * Only ports in the range 49152 (0xC000) and 65535 (0xFFFF)
> -              * will be used, with the least significant byte representing
> -              * the lcore ID. As such, the most significant byte will cycle
> -              * through 0xC0 and 0xFF.
> +              * Total unique flows = txonly_nb_flows * active_lcores.
> +              *
> +              * Note: lcore IDs above 255 will alias in the low byte,
> +              * causing flow overlap between those lcores. This is
> +              * acceptable as the total flow count at that scale
> +              * already exceeds typical hardware flow table sizes.
>                */
> -             src_port = ((src_var++ | 0xC0) << 8) + rte_lcore_id();
> +             src_port = (((src_var++ % txonly_nb_flows) + 0xC0) << 8)
> +                        + rte_lcore_id();
>               udp_hdr->src_port = rte_cpu_to_be_16(src_port);
>               RTE_PER_LCORE(_src_port_var) = src_var;
>       }
> diff --git a/doc/guides/testpmd_app_ug/run_app.rst
> b/doc/guides/testpmd_app_ug/run_app.rst
> index 97d6c75716..a4a57ea383 100644
> --- a/doc/guides/testpmd_app_ug/run_app.rst
> +++ b/doc/guides/testpmd_app_ug/run_app.rst
> @@ -386,6 +386,14 @@ The command line options are:
> 
>      Generate multiple flows in txonly mode.
> 
> +*   ``--txonly-nb-flows=N``
> +
> +    Set the number of unique flows per lcore when txonly multi-flow mode
> +    is enabled. Valid range is 1 to 64. Default is 64, which preserves
> +    the original behavior. Reducing this value limits the number of unique
> +    UDP source ports generated, which can prevent exhaustion of hardware
> +    flow table entries on SmartNICs.
> +
>  *   ``--rxq-share=[X]``
> 
>      Create queues in shared Rx queue mode if device supports.
> diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> index 62bb167d56..ff1c8a444d 100644
> --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> @@ -924,6 +924,22 @@ Where:
> 
>  * ``rand`` same as 'on', but number of segments per each packet is a random
> value between 1 and total number of segments.
> 
> +set txonly-nb-flows
> +~~~~~~~~~~~~~~~~~~~
> +
> +Set the number of unique flows per lcore in txonly multi-flow mode::
> +
> +   testpmd> set txonly-nb-flows <N>
> +
> +Where ``N`` is the number of unique UDP source port values each lcore
> +will cycle through, in the range 1 to 64. Default is 64.
> +
> +Each lcore generates unique flows by combining the flow index with its
> +lcore ID, so the total number of unique flows across the system is
> +``txonly-nb-flows * active_lcores``. Reducing this value can prevent
> +exhaustion of hardware flow table entries on SmartNICs that have
> +limited flow caching capacity.
> +
>  set corelist
>  ~~~~~~~~~~~~
> 
> --
> 2.51.0

Reply via email to