]> git.droids-corp.org - dpdk.git/commitdiff
app/testpmd: support sending cloned packets in flowgen
authorIgor Russkikh <irusskikh@marvell.com>
Thu, 21 Jan 2021 18:05:30 +0000 (19:05 +0100)
committerFerruh Yigit <ferruh.yigit@intel.com>
Fri, 29 Jan 2021 17:16:12 +0000 (18:16 +0100)
When testing high performance numbers, it is often that CPU performance
limits the max values device can reach (both in pps and in gbps)

Here instead of recreating each packet separately, we use clones counter
to resend the same mbuf to the line multiple times.

PMDs handle that transparently due to reference counting inside of mbuf.

Reaching max PPS on small packet sizes helps here:
Some data from our 2 port x 50G device. Using 2*6 tx queues, 64b packets,
PowerEdge R7525, AMD EPYC 7452:

./build/app/dpdk-testpmd -l 32-63  -- --forward-mode=flowgen \
  --rxq=6 --txq=6  --disable-crc-strip --burst=512 \
  --flowgen-clones=0 --txd=4096 --stats-period=1 --txpkts=64

Gives ~46MPPS TX output:

  Tx-pps:     22926849          Tx-bps:  11738590176
  Tx-pps:     23642629          Tx-bps:  12105024112

Setting flowgen-clones to 512 pushes TX almost to our device
physical limit (68MPPS) using same 2*6 queues(cores):

  Tx-pps:     34357556          Tx-bps:  17591073696
  Tx-pps:     34353211          Tx-bps:  17588802640

Doing similar measurements per core, I see one core can do
6.9MPPS (without clones) vs 11MPPS (with clones)

Verified on Marvell qede and atlantic PMDs.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
app/test-pmd/flowgen.c
app/test-pmd/parameters.c
app/test-pmd/testpmd.c
app/test-pmd/testpmd.h
doc/guides/testpmd_app_ug/run_app.rst

index cabfc688ff3eeaba6f5e029264b54ccc6dd59d76..3bf6e1ce97a7d4232b79254733903db1a69b0f6c 100644 (file)
@@ -88,7 +88,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
        unsigned pkt_size = tx_pkt_length - 4;  /* Adjust FCS */
        struct rte_mbuf  *pkts_burst[MAX_PKT_BURST];
        struct rte_mempool *mbp;
-       struct rte_mbuf  *pkt;
+       struct rte_mbuf  *pkt = NULL;
        struct rte_ether_hdr *eth_hdr;
        struct rte_ipv4_hdr *ip_hdr;
        struct rte_udp_hdr *udp_hdr;
@@ -97,6 +97,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
        uint16_t nb_rx;
        uint16_t nb_tx;
        uint16_t nb_pkt;
+       uint16_t nb_clones = nb_pkt_flowgen_clones;
        uint16_t i;
        uint32_t retry;
        uint64_t tx_offloads;
@@ -126,53 +127,63 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
                ol_flags |= PKT_TX_MACSEC;
 
        for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-               pkt = rte_mbuf_raw_alloc(mbp);
-               if (!pkt)
-                       break;
-
-               pkt->data_len = pkt_size;
-               pkt->next = NULL;
-
-               /* Initialize Ethernet header. */
-               eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
-               rte_ether_addr_copy(&cfg_ether_dst, &eth_hdr->d_addr);
-               rte_ether_addr_copy(&cfg_ether_src, &eth_hdr->s_addr);
-               eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
-
-               /* Initialize IP header. */
-               ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
-               memset(ip_hdr, 0, sizeof(*ip_hdr));
-               ip_hdr->version_ihl     = RTE_IPV4_VHL_DEF;
-               ip_hdr->type_of_service = 0;
-               ip_hdr->fragment_offset = 0;
-               ip_hdr->time_to_live    = IP_DEFTTL;
-               ip_hdr->next_proto_id   = IPPROTO_UDP;
-               ip_hdr->packet_id       = 0;
-               ip_hdr->src_addr        = rte_cpu_to_be_32(cfg_ip_src);
-               ip_hdr->dst_addr        = rte_cpu_to_be_32(cfg_ip_dst +
-                                                          next_flow);
-               ip_hdr->total_length    = RTE_CPU_TO_BE_16(pkt_size -
-                                                          sizeof(*eth_hdr));
-               ip_hdr->hdr_checksum    = ip_sum((const alias_int16_t *)ip_hdr,
-                                                sizeof(*ip_hdr));
-
-               /* Initialize UDP header. */
-               udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1);
-               udp_hdr->src_port       = rte_cpu_to_be_16(cfg_udp_src);
-               udp_hdr->dst_port       = rte_cpu_to_be_16(cfg_udp_dst);
-               udp_hdr->dgram_cksum    = 0; /* No UDP checksum. */
-               udp_hdr->dgram_len      = RTE_CPU_TO_BE_16(pkt_size -
-                                                          sizeof(*eth_hdr) -
-                                                          sizeof(*ip_hdr));
-               pkt->nb_segs            = 1;
-               pkt->pkt_len            = pkt_size;
-               pkt->ol_flags           &= EXT_ATTACHED_MBUF;
-               pkt->ol_flags           |= ol_flags;
-               pkt->vlan_tci           = vlan_tci;
-               pkt->vlan_tci_outer     = vlan_tci_outer;
-               pkt->l2_len             = sizeof(struct rte_ether_hdr);
-               pkt->l3_len             = sizeof(struct rte_ipv4_hdr);
-               pkts_burst[nb_pkt]      = pkt;
+               if (!nb_pkt || !nb_clones) {
+                       nb_clones = nb_pkt_flowgen_clones;
+                       /* Logic limitation */
+                       if (nb_clones > nb_pkt_per_burst)
+                               nb_clones = nb_pkt_per_burst;
+
+                       pkt = rte_mbuf_raw_alloc(mbp);
+                       if (!pkt)
+                               break;
+
+                       pkt->data_len = pkt_size;
+                       pkt->next = NULL;
+
+                       /* Initialize Ethernet header. */
+                       eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+                       rte_ether_addr_copy(&cfg_ether_dst, &eth_hdr->d_addr);
+                       rte_ether_addr_copy(&cfg_ether_src, &eth_hdr->s_addr);
+                       eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+
+                       /* Initialize IP header. */
+                       ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
+                       memset(ip_hdr, 0, sizeof(*ip_hdr));
+                       ip_hdr->version_ihl     = RTE_IPV4_VHL_DEF;
+                       ip_hdr->type_of_service = 0;
+                       ip_hdr->fragment_offset = 0;
+                       ip_hdr->time_to_live    = IP_DEFTTL;
+                       ip_hdr->next_proto_id   = IPPROTO_UDP;
+                       ip_hdr->packet_id       = 0;
+                       ip_hdr->src_addr        = rte_cpu_to_be_32(cfg_ip_src);
+                       ip_hdr->dst_addr        = rte_cpu_to_be_32(cfg_ip_dst +
+                                                                  next_flow);
+                       ip_hdr->total_length    = RTE_CPU_TO_BE_16(pkt_size -
+                                                                  sizeof(*eth_hdr));
+                       ip_hdr->hdr_checksum    = ip_sum((const alias_int16_t *)ip_hdr,
+                                                        sizeof(*ip_hdr));
+
+                       /* Initialize UDP header. */
+                       udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1);
+                       udp_hdr->src_port       = rte_cpu_to_be_16(cfg_udp_src);
+                       udp_hdr->dst_port       = rte_cpu_to_be_16(cfg_udp_dst);
+                       udp_hdr->dgram_cksum    = 0; /* No UDP checksum. */
+                       udp_hdr->dgram_len      = RTE_CPU_TO_BE_16(pkt_size -
+                                                                  sizeof(*eth_hdr) -
+                                                                  sizeof(*ip_hdr));
+                       pkt->nb_segs            = 1;
+                       pkt->pkt_len            = pkt_size;
+                       pkt->ol_flags           &= EXT_ATTACHED_MBUF;
+                       pkt->ol_flags           |= ol_flags;
+                       pkt->vlan_tci           = vlan_tci;
+                       pkt->vlan_tci_outer     = vlan_tci_outer;
+                       pkt->l2_len             = sizeof(struct rte_ether_hdr);
+                       pkt->l3_len             = sizeof(struct rte_ipv4_hdr);
+               } else {
+                       nb_clones--;
+                       rte_mbuf_refcnt_update(pkt, 1);
+               }
+               pkts_burst[nb_pkt] = pkt;
 
                next_flow = (next_flow + 1) % cfg_n_flows;
        }
index df5eb10d841fbd28b89fea7672b332f8d859f5eb..c8acd5d1b7464257940dc9e3af7bea02bf0ee0e1 100644 (file)
@@ -163,6 +163,7 @@ usage(char* progname)
        printf("  --hairpinq=N: set the number of hairpin queues per port to "
               "N.\n");
        printf("  --burst=N: set the number of packets per burst to N.\n");
+       printf("  --flowgen-clones=N: set the number of single packet clones to send in flowgen mode. Should be less than burst value.\n");
        printf("  --mbcache=N: set the cache of mbuf memory pool to N.\n");
        printf("  --rxpt=N: set prefetch threshold register of RX rings to N.\n");
        printf("  --rxht=N: set the host threshold register of RX rings to N.\n");
@@ -561,6 +562,7 @@ launch_args_parse(int argc, char** argv)
                { "hairpinq",                   1, 0, 0 },
                { "hairpin-mode",               1, 0, 0 },
                { "burst",                      1, 0, 0 },
+               { "flowgen-clones",             1, 0, 0 },
                { "mbcache",                    1, 0, 0 },
                { "txpt",                       1, 0, 0 },
                { "txht",                       1, 0, 0 },
@@ -1086,6 +1088,14 @@ launch_args_parse(int argc, char** argv)
                                else
                                        nb_pkt_per_burst = (uint16_t) n;
                        }
+                       if (!strcmp(lgopts[opt_idx].name, "flowgen-clones")) {
+                               n = atoi(optarg);
+                               if (n >= 0)
+                                       nb_pkt_flowgen_clones = (uint16_t) n;
+                               else
+                                       rte_exit(EXIT_FAILURE,
+                                                "clones must be >= 0 and <= current burst\n");
+                       }
                        if (!strcmp(lgopts[opt_idx].name, "mbcache")) {
                                n = atoi(optarg);
                                if ((n >= 0) &&
index 555852ae5e42c9170dcaa129b48104cbd7051b74..caa711d6f3d041282362c99be18669e73391b903 100644 (file)
@@ -240,6 +240,7 @@ uint32_t tx_pkt_times_intra;
 /**< Timings for send scheduling in TXONLY mode, time between packets. */
 
 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
+uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
 
 /* current configuration is in DCB or not,0 means it is not in DCB mode */
index 2f8f5a92e46a0888713f609bf5bc063beae5a9a5..4aca5228595732f2498b754c990d84016882e3ca 100644 (file)
@@ -476,6 +476,7 @@ extern enum tx_pkt_split tx_pkt_split;
 extern uint8_t txonly_multi_flow;
 
 extern uint16_t nb_pkt_per_burst;
+extern uint16_t nb_pkt_flowgen_clones;
 extern uint16_t mb_mempool_cache;
 extern int8_t rx_pthresh;
 extern int8_t rx_hthresh;
index ca67105b704f5b3a19dfc18bd73b0966839a6a04..6745072329cbbedea27da8383f9c5e2ac02c2329 100644 (file)
@@ -299,6 +299,13 @@ The command line options are:
     If set to 0, driver default is used if defined. Else, if driver
     default is not defined, default of 32 is used.
 
+*   ``--flowgen-clones=N``
+
+    Set the number of each packet clones to be sent in `flowgen` mode.
+    Sending clones reduces host CPU load on creating packets and may help
+    in testing extreme speeds or maxing out Tx packet performance.
+    N should be not zero, but less than 'burst' parameter.
+
 *   ``--mbcache=N``
 
     Set the cache of mbuf memory pools to N, where 0 <= N <= 512.