909b6571dc2613fbe72273e88a97fcee7d319fe4
[dpdk.git] / app / test-pmd / testpmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_atomic.h>
40 #include <rte_branch_prediction.h>
41 #include <rte_mempool.h>
42 #include <rte_malloc.h>
43 #include <rte_mbuf.h>
44 #include <rte_mbuf_pool_ops.h>
45 #include <rte_interrupts.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_dev.h>
50 #include <rte_string_fns.h>
51 #ifdef RTE_NET_IXGBE
52 #include <rte_pmd_ixgbe.h>
53 #endif
54 #ifdef RTE_LIB_PDUMP
55 #include <rte_pdump.h>
56 #endif
57 #include <rte_flow.h>
58 #include <rte_metrics.h>
59 #ifdef RTE_LIB_BITRATESTATS
60 #include <rte_bitrate.h>
61 #endif
62 #ifdef RTE_LIB_LATENCYSTATS
63 #include <rte_latencystats.h>
64 #endif
65 #ifdef RTE_EXEC_ENV_WINDOWS
66 #include <process.h>
67 #endif
68
69 #include "testpmd.h"
70
71 #ifndef MAP_HUGETLB
72 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
73 #define HUGE_FLAG (0x40000)
74 #else
75 #define HUGE_FLAG MAP_HUGETLB
76 #endif
77
78 #ifndef MAP_HUGE_SHIFT
79 /* older kernels (or FreeBSD) will not have this define */
80 #define HUGE_SHIFT (26)
81 #else
82 #define HUGE_SHIFT MAP_HUGE_SHIFT
83 #endif
84
85 #define EXTMEM_HEAP_NAME "extmem"
86 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
87
88 uint16_t verbose_level = 0; /**< Silent by default. */
89 int testpmd_logtype; /**< Log type for testpmd logs */
90
91 /* use main core for command line ? */
92 uint8_t interactive = 0;
93 uint8_t auto_start = 0;
94 uint8_t tx_first;
95 char cmdline_filename[PATH_MAX] = {0};
96
97 /*
98  * NUMA support configuration.
99  * When set, the NUMA support attempts to dispatch the allocation of the
100  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
101  * probed ports among the CPU sockets 0 and 1.
102  * Otherwise, all memory is allocated from CPU socket 0.
103  */
104 uint8_t numa_support = 1; /**< numa enabled by default */
105
106 /*
107  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
108  * not configured.
109  */
110 uint8_t socket_num = UMA_NO_CONFIG;
111
112 /*
113  * Select mempool allocation type:
114  * - native: use regular DPDK memory
115  * - anon: use regular DPDK memory to create mempool, but populate using
116  *         anonymous memory (may not be IOVA-contiguous)
117  * - xmem: use externally allocated hugepage memory
118  */
119 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
120
121 /*
122  * Store specified sockets on which memory pool to be used by ports
123  * is allocated.
124  */
125 uint8_t port_numa[RTE_MAX_ETHPORTS];
126
127 /*
128  * Store specified sockets on which RX ring to be used by ports
129  * is allocated.
130  */
131 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
132
133 /*
134  * Store specified sockets on which TX ring to be used by ports
135  * is allocated.
136  */
137 uint8_t txring_numa[RTE_MAX_ETHPORTS];
138
139 /*
140  * Record the Ethernet address of peer target ports to which packets are
141  * forwarded.
142  * Must be instantiated with the ethernet addresses of peer traffic generator
143  * ports.
144  */
145 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
146 portid_t nb_peer_eth_addrs = 0;
147
148 /*
149  * Probed Target Environment.
150  */
151 struct rte_port *ports;        /**< For all probed ethernet ports. */
152 portid_t nb_ports;             /**< Number of probed ethernet ports. */
153 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
154 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
155
156 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
157
158 /*
159  * Test Forwarding Configuration.
160  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
161  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
162  */
163 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
164 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
165 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
166 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
167
168 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
169 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
170
171 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
172 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
173
174 /*
175  * Forwarding engines.
176  */
177 struct fwd_engine * fwd_engines[] = {
178         &io_fwd_engine,
179         &mac_fwd_engine,
180         &mac_swap_engine,
181         &flow_gen_engine,
182         &rx_only_engine,
183         &tx_only_engine,
184         &csum_fwd_engine,
185         &icmp_echo_engine,
186         &noisy_vnf_engine,
187         &five_tuple_swap_fwd_engine,
188 #ifdef RTE_LIBRTE_IEEE1588
189         &ieee1588_fwd_engine,
190 #endif
191         NULL,
192 };
193
194 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
195 uint16_t mempool_flags;
196
197 struct fwd_config cur_fwd_config;
198 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
199 uint32_t retry_enabled;
200 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
201 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
202
203 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
204 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
205         DEFAULT_MBUF_DATA_SIZE
206 }; /**< Mbuf data space size. */
207 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
208                                       * specified on command-line. */
209 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
210
211 /** Extended statistics to show. */
212 struct rte_eth_xstat_name *xstats_display;
213
214 unsigned int xstats_display_num; /**< Size of extended statistics to show */
215
216 /*
217  * In container, it cannot terminate the process which running with 'stats-period'
218  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
219  */
220 uint8_t f_quit;
221
222 /*
223  * Configuration of packet segments used to scatter received packets
224  * if some of split features is configured.
225  */
226 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
227 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
228 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
229 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
230
231 /*
232  * Configuration of packet segments used by the "txonly" processing engine.
233  */
234 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
235 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
236         TXONLY_DEF_PACKET_LEN,
237 };
238 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
239
240 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
241 /**< Split policy for packets to TX. */
242
243 uint8_t txonly_multi_flow;
244 /**< Whether multiple flows are generated in TXONLY mode. */
245
246 uint32_t tx_pkt_times_inter;
247 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
248
249 uint32_t tx_pkt_times_intra;
250 /**< Timings for send scheduling in TXONLY mode, time between packets. */
251
252 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
253 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
254 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
255 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
256
257 /* current configuration is in DCB or not,0 means it is not in DCB mode */
258 uint8_t dcb_config = 0;
259
260 /*
261  * Configurable number of RX/TX queues.
262  */
263 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
264 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
265 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
266
267 /*
268  * Configurable number of RX/TX ring descriptors.
269  * Defaults are supplied by drivers via ethdev.
270  */
271 #define RTE_TEST_RX_DESC_DEFAULT 0
272 #define RTE_TEST_TX_DESC_DEFAULT 0
273 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
274 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
275
276 #define RTE_PMD_PARAM_UNSET -1
277 /*
278  * Configurable values of RX and TX ring threshold registers.
279  */
280
281 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
282 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
283 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
284
285 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
286 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
287 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
288
289 /*
290  * Configurable value of RX free threshold.
291  */
292 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
293
294 /*
295  * Configurable value of RX drop enable.
296  */
297 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
298
299 /*
300  * Configurable value of TX free threshold.
301  */
302 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
303
304 /*
305  * Configurable value of TX RS bit threshold.
306  */
307 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
308
309 /*
310  * Configurable value of buffered packets before sending.
311  */
312 uint16_t noisy_tx_sw_bufsz;
313
314 /*
315  * Configurable value of packet buffer timeout.
316  */
317 uint16_t noisy_tx_sw_buf_flush_time;
318
319 /*
320  * Configurable value for size of VNF internal memory area
321  * used for simulating noisy neighbour behaviour
322  */
323 uint64_t noisy_lkup_mem_sz;
324
325 /*
326  * Configurable value of number of random writes done in
327  * VNF simulation memory area.
328  */
329 uint64_t noisy_lkup_num_writes;
330
331 /*
332  * Configurable value of number of random reads done in
333  * VNF simulation memory area.
334  */
335 uint64_t noisy_lkup_num_reads;
336
337 /*
338  * Configurable value of number of random reads/writes done in
339  * VNF simulation memory area.
340  */
341 uint64_t noisy_lkup_num_reads_writes;
342
343 /*
344  * Receive Side Scaling (RSS) configuration.
345  */
346 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
347
348 /*
349  * Port topology configuration
350  */
351 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
352
353 /*
354  * Avoids to flush all the RX streams before starts forwarding.
355  */
356 uint8_t no_flush_rx = 0; /* flush by default */
357
358 /*
359  * Flow API isolated mode.
360  */
361 uint8_t flow_isolate_all;
362
363 /*
364  * Avoids to check link status when starting/stopping a port.
365  */
366 uint8_t no_link_check = 0; /* check by default */
367
368 /*
369  * Don't automatically start all ports in interactive mode.
370  */
371 uint8_t no_device_start = 0;
372
373 /*
374  * Enable link status change notification
375  */
376 uint8_t lsc_interrupt = 1; /* enabled by default */
377
378 /*
379  * Enable device removal notification.
380  */
381 uint8_t rmv_interrupt = 1; /* enabled by default */
382
383 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
384
385 /* After attach, port setup is called on event or by iterator */
386 bool setup_on_probe_event = true;
387
388 /* Clear ptypes on port initialization. */
389 uint8_t clear_ptypes = true;
390
391 /* Hairpin ports configuration mode. */
392 uint16_t hairpin_mode;
393
394 /* Pretty printing of ethdev events */
395 static const char * const eth_event_desc[] = {
396         [RTE_ETH_EVENT_UNKNOWN] = "unknown",
397         [RTE_ETH_EVENT_INTR_LSC] = "link state change",
398         [RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
399         [RTE_ETH_EVENT_INTR_RESET] = "reset",
400         [RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
401         [RTE_ETH_EVENT_IPSEC] = "IPsec",
402         [RTE_ETH_EVENT_MACSEC] = "MACsec",
403         [RTE_ETH_EVENT_INTR_RMV] = "device removal",
404         [RTE_ETH_EVENT_NEW] = "device probed",
405         [RTE_ETH_EVENT_DESTROY] = "device released",
406         [RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
407         [RTE_ETH_EVENT_MAX] = NULL,
408 };
409
410 /*
411  * Display or mask ether events
412  * Default to all events except VF_MBOX
413  */
414 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
415                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
416                             (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
417                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
418                             (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
419                             (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
420                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
421                             (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
422 /*
423  * Decide if all memory are locked for performance.
424  */
425 int do_mlockall = 0;
426
427 /*
428  * NIC bypass mode configuration options.
429  */
430
431 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
432 /* The NIC bypass watchdog timeout. */
433 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
434 #endif
435
436
437 #ifdef RTE_LIB_LATENCYSTATS
438
439 /*
440  * Set when latency stats is enabled in the commandline
441  */
442 uint8_t latencystats_enabled;
443
444 /*
445  * Lcore ID to serive latency statistics.
446  */
447 lcoreid_t latencystats_lcore_id = -1;
448
449 #endif
450
451 /*
452  * Ethernet device configuration.
453  */
454 struct rte_eth_rxmode rx_mode = {
455         /* Default maximum frame length.
456          * Zero is converted to "RTE_ETHER_MTU + PMD Ethernet overhead"
457          * in init_config().
458          */
459         .max_rx_pkt_len = 0,
460 };
461
462 struct rte_eth_txmode tx_mode = {
463         .offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
464 };
465
466 struct rte_fdir_conf fdir_conf = {
467         .mode = RTE_FDIR_MODE_NONE,
468         .pballoc = RTE_FDIR_PBALLOC_64K,
469         .status = RTE_FDIR_REPORT_STATUS,
470         .mask = {
471                 .vlan_tci_mask = 0xFFEF,
472                 .ipv4_mask     = {
473                         .src_ip = 0xFFFFFFFF,
474                         .dst_ip = 0xFFFFFFFF,
475                 },
476                 .ipv6_mask     = {
477                         .src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
478                         .dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
479                 },
480                 .src_port_mask = 0xFFFF,
481                 .dst_port_mask = 0xFFFF,
482                 .mac_addr_byte_mask = 0xFF,
483                 .tunnel_type_mask = 1,
484                 .tunnel_id_mask = 0xFFFFFFFF,
485         },
486         .drop_queue = 127,
487 };
488
489 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
490
491 /*
492  * Display zero values by default for xstats
493  */
494 uint8_t xstats_hide_zero;
495
496 /*
497  * Measure of CPU cycles disabled by default
498  */
499 uint8_t record_core_cycles;
500
501 /*
502  * Display of RX and TX bursts disabled by default
503  */
504 uint8_t record_burst_stats;
505
506 unsigned int num_sockets = 0;
507 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
508
509 #ifdef RTE_LIB_BITRATESTATS
510 /* Bitrate statistics */
511 struct rte_stats_bitrates *bitrate_data;
512 lcoreid_t bitrate_lcore_id;
513 uint8_t bitrate_enabled;
514 #endif
515
516 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
517 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
518
519 /*
520  * hexadecimal bitmask of RX mq mode can be enabled.
521  */
522 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
523
524 /*
525  * Used to set forced link speed
526  */
527 uint32_t eth_link_speed;
528
529 /*
530  * ID of the current process in multi-process, used to
531  * configure the queues to be polled.
532  */
533 int proc_id;
534
535 /*
536  * Number of processes in multi-process, used to
537  * configure the queues to be polled.
538  */
539 unsigned int num_procs = 1;
540
541 static void
542 eth_rx_metadata_negotiate_mp(uint16_t port_id)
543 {
544         uint64_t rx_meta_features = 0;
545         int ret;
546
547         if (!is_proc_primary())
548                 return;
549
550         rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
551         rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
552         rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
553
554         ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
555         if (ret == 0) {
556                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
557                         TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
558                                     port_id);
559                 }
560
561                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
562                         TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
563                                     port_id);
564                 }
565
566                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
567                         TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
568                                     port_id);
569                 }
570         } else if (ret != -ENOTSUP) {
571                 rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
572                          port_id, rte_strerror(-ret));
573         }
574 }
575
576 static void
577 flow_pick_transfer_proxy_mp(uint16_t port_id)
578 {
579         struct rte_port *port = &ports[port_id];
580         int ret;
581
582         port->flow_transfer_proxy = port_id;
583
584         if (!is_proc_primary())
585                 return;
586
587         ret = rte_flow_pick_transfer_proxy(port_id, &port->flow_transfer_proxy,
588                                            NULL);
589         if (ret != 0) {
590                 fprintf(stderr, "Error picking flow transfer proxy for port %u: %s - ignore\n",
591                         port_id, rte_strerror(-ret));
592         }
593 }
594
595 static int
596 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
597                       const struct rte_eth_conf *dev_conf)
598 {
599         if (is_proc_primary())
600                 return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
601                                         dev_conf);
602         return 0;
603 }
604
605 static int
606 eth_dev_start_mp(uint16_t port_id)
607 {
608         if (is_proc_primary())
609                 return rte_eth_dev_start(port_id);
610
611         return 0;
612 }
613
614 static int
615 eth_dev_stop_mp(uint16_t port_id)
616 {
617         if (is_proc_primary())
618                 return rte_eth_dev_stop(port_id);
619
620         return 0;
621 }
622
623 static void
624 mempool_free_mp(struct rte_mempool *mp)
625 {
626         if (is_proc_primary())
627                 rte_mempool_free(mp);
628 }
629
630 static int
631 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
632 {
633         if (is_proc_primary())
634                 return rte_eth_dev_set_mtu(port_id, mtu);
635
636         return 0;
637 }
638
639 /* Forward function declarations */
640 static void setup_attached_port(portid_t pi);
641 static void check_all_ports_link_status(uint32_t port_mask);
642 static int eth_event_callback(portid_t port_id,
643                               enum rte_eth_event_type type,
644                               void *param, void *ret_param);
645 static void dev_event_callback(const char *device_name,
646                                 enum rte_dev_event_type type,
647                                 void *param);
648 static void fill_xstats_display_info(void);
649
650 /*
651  * Check if all the ports are started.
652  * If yes, return positive value. If not, return zero.
653  */
654 static int all_ports_started(void);
655
656 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
657 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
658
659 /* Holds the registered mbuf dynamic flags names. */
660 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
661
662
663 /*
664  * Helper function to check if socket is already discovered.
665  * If yes, return positive value. If not, return zero.
666  */
667 int
668 new_socket_id(unsigned int socket_id)
669 {
670         unsigned int i;
671
672         for (i = 0; i < num_sockets; i++) {
673                 if (socket_ids[i] == socket_id)
674                         return 0;
675         }
676         return 1;
677 }
678
679 /*
680  * Setup default configuration.
681  */
682 static void
683 set_default_fwd_lcores_config(void)
684 {
685         unsigned int i;
686         unsigned int nb_lc;
687         unsigned int sock_num;
688
689         nb_lc = 0;
690         for (i = 0; i < RTE_MAX_LCORE; i++) {
691                 if (!rte_lcore_is_enabled(i))
692                         continue;
693                 sock_num = rte_lcore_to_socket_id(i);
694                 if (new_socket_id(sock_num)) {
695                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
696                                 rte_exit(EXIT_FAILURE,
697                                          "Total sockets greater than %u\n",
698                                          RTE_MAX_NUMA_NODES);
699                         }
700                         socket_ids[num_sockets++] = sock_num;
701                 }
702                 if (i == rte_get_main_lcore())
703                         continue;
704                 fwd_lcores_cpuids[nb_lc++] = i;
705         }
706         nb_lcores = (lcoreid_t) nb_lc;
707         nb_cfg_lcores = nb_lcores;
708         nb_fwd_lcores = 1;
709 }
710
711 static void
712 set_def_peer_eth_addrs(void)
713 {
714         portid_t i;
715
716         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
717                 peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
718                 peer_eth_addrs[i].addr_bytes[5] = i;
719         }
720 }
721
722 static void
723 set_default_fwd_ports_config(void)
724 {
725         portid_t pt_id;
726         int i = 0;
727
728         RTE_ETH_FOREACH_DEV(pt_id) {
729                 fwd_ports_ids[i++] = pt_id;
730
731                 /* Update sockets info according to the attached device */
732                 int socket_id = rte_eth_dev_socket_id(pt_id);
733                 if (socket_id >= 0 && new_socket_id(socket_id)) {
734                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
735                                 rte_exit(EXIT_FAILURE,
736                                          "Total sockets greater than %u\n",
737                                          RTE_MAX_NUMA_NODES);
738                         }
739                         socket_ids[num_sockets++] = socket_id;
740                 }
741         }
742
743         nb_cfg_ports = nb_ports;
744         nb_fwd_ports = nb_ports;
745 }
746
747 void
748 set_def_fwd_config(void)
749 {
750         set_default_fwd_lcores_config();
751         set_def_peer_eth_addrs();
752         set_default_fwd_ports_config();
753 }
754
755 #ifndef RTE_EXEC_ENV_WINDOWS
756 /* extremely pessimistic estimation of memory required to create a mempool */
757 static int
758 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
759 {
760         unsigned int n_pages, mbuf_per_pg, leftover;
761         uint64_t total_mem, mbuf_mem, obj_sz;
762
763         /* there is no good way to predict how much space the mempool will
764          * occupy because it will allocate chunks on the fly, and some of those
765          * will come from default DPDK memory while some will come from our
766          * external memory, so just assume 128MB will be enough for everyone.
767          */
768         uint64_t hdr_mem = 128 << 20;
769
770         /* account for possible non-contiguousness */
771         obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
772         if (obj_sz > pgsz) {
773                 TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
774                 return -1;
775         }
776
777         mbuf_per_pg = pgsz / obj_sz;
778         leftover = (nb_mbufs % mbuf_per_pg) > 0;
779         n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
780
781         mbuf_mem = n_pages * pgsz;
782
783         total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
784
785         if (total_mem > SIZE_MAX) {
786                 TESTPMD_LOG(ERR, "Memory size too big\n");
787                 return -1;
788         }
789         *out = (size_t)total_mem;
790
791         return 0;
792 }
793
794 static int
795 pagesz_flags(uint64_t page_sz)
796 {
797         /* as per mmap() manpage, all page sizes are log2 of page size
798          * shifted by MAP_HUGE_SHIFT
799          */
800         int log2 = rte_log2_u64(page_sz);
801
802         return (log2 << HUGE_SHIFT);
803 }
804
805 static void *
806 alloc_mem(size_t memsz, size_t pgsz, bool huge)
807 {
808         void *addr;
809         int flags;
810
811         /* allocate anonymous hugepages */
812         flags = MAP_ANONYMOUS | MAP_PRIVATE;
813         if (huge)
814                 flags |= HUGE_FLAG | pagesz_flags(pgsz);
815
816         addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
817         if (addr == MAP_FAILED)
818                 return NULL;
819
820         return addr;
821 }
822
823 struct extmem_param {
824         void *addr;
825         size_t len;
826         size_t pgsz;
827         rte_iova_t *iova_table;
828         unsigned int iova_table_len;
829 };
830
831 static int
832 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
833                 bool huge)
834 {
835         uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
836                         RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
837         unsigned int cur_page, n_pages, pgsz_idx;
838         size_t mem_sz, cur_pgsz;
839         rte_iova_t *iovas = NULL;
840         void *addr;
841         int ret;
842
843         for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
844                 /* skip anything that is too big */
845                 if (pgsizes[pgsz_idx] > SIZE_MAX)
846                         continue;
847
848                 cur_pgsz = pgsizes[pgsz_idx];
849
850                 /* if we were told not to allocate hugepages, override */
851                 if (!huge)
852                         cur_pgsz = sysconf(_SC_PAGESIZE);
853
854                 ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
855                 if (ret < 0) {
856                         TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
857                         return -1;
858                 }
859
860                 /* allocate our memory */
861                 addr = alloc_mem(mem_sz, cur_pgsz, huge);
862
863                 /* if we couldn't allocate memory with a specified page size,
864                  * that doesn't mean we can't do it with other page sizes, so
865                  * try another one.
866                  */
867                 if (addr == NULL)
868                         continue;
869
870                 /* store IOVA addresses for every page in this memory area */
871                 n_pages = mem_sz / cur_pgsz;
872
873                 iovas = malloc(sizeof(*iovas) * n_pages);
874
875                 if (iovas == NULL) {
876                         TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
877                         goto fail;
878                 }
879                 /* lock memory if it's not huge pages */
880                 if (!huge)
881                         mlock(addr, mem_sz);
882
883                 /* populate IOVA addresses */
884                 for (cur_page = 0; cur_page < n_pages; cur_page++) {
885                         rte_iova_t iova;
886                         size_t offset;
887                         void *cur;
888
889                         offset = cur_pgsz * cur_page;
890                         cur = RTE_PTR_ADD(addr, offset);
891
892                         /* touch the page before getting its IOVA */
893                         *(volatile char *)cur = 0;
894
895                         iova = rte_mem_virt2iova(cur);
896
897                         iovas[cur_page] = iova;
898                 }
899
900                 break;
901         }
902         /* if we couldn't allocate anything */
903         if (iovas == NULL)
904                 return -1;
905
906         param->addr = addr;
907         param->len = mem_sz;
908         param->pgsz = cur_pgsz;
909         param->iova_table = iovas;
910         param->iova_table_len = n_pages;
911
912         return 0;
913 fail:
914         if (iovas)
915                 free(iovas);
916         if (addr)
917                 munmap(addr, mem_sz);
918
919         return -1;
920 }
921
922 static int
923 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
924 {
925         struct extmem_param param;
926         int socket_id, ret;
927
928         memset(&param, 0, sizeof(param));
929
930         /* check if our heap exists */
931         socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
932         if (socket_id < 0) {
933                 /* create our heap */
934                 ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
935                 if (ret < 0) {
936                         TESTPMD_LOG(ERR, "Cannot create heap\n");
937                         return -1;
938                 }
939         }
940
941         ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
942         if (ret < 0) {
943                 TESTPMD_LOG(ERR, "Cannot create memory area\n");
944                 return -1;
945         }
946
947         /* we now have a valid memory area, so add it to heap */
948         ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
949                         param.addr, param.len, param.iova_table,
950                         param.iova_table_len, param.pgsz);
951
952         /* when using VFIO, memory is automatically mapped for DMA by EAL */
953
954         /* not needed any more */
955         free(param.iova_table);
956
957         if (ret < 0) {
958                 TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
959                 munmap(param.addr, param.len);
960                 return -1;
961         }
962
963         /* success */
964
965         TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
966                         param.len >> 20);
967
968         return 0;
969 }
970 static void
971 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
972              struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
973 {
974         uint16_t pid = 0;
975         int ret;
976
977         RTE_ETH_FOREACH_DEV(pid) {
978                 struct rte_eth_dev_info dev_info;
979
980                 ret = eth_dev_info_get_print_err(pid, &dev_info);
981                 if (ret != 0) {
982                         TESTPMD_LOG(DEBUG,
983                                     "unable to get device info for port %d on addr 0x%p,"
984                                     "mempool unmapping will not be performed\n",
985                                     pid, memhdr->addr);
986                         continue;
987                 }
988
989                 ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
990                 if (ret) {
991                         TESTPMD_LOG(DEBUG,
992                                     "unable to DMA unmap addr 0x%p "
993                                     "for device %s\n",
994                                     memhdr->addr, dev_info.device->name);
995                 }
996         }
997         ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
998         if (ret) {
999                 TESTPMD_LOG(DEBUG,
1000                             "unable to un-register addr 0x%p\n", memhdr->addr);
1001         }
1002 }
1003
1004 static void
1005 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1006            struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1007 {
1008         uint16_t pid = 0;
1009         size_t page_size = sysconf(_SC_PAGESIZE);
1010         int ret;
1011
1012         ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1013                                   page_size);
1014         if (ret) {
1015                 TESTPMD_LOG(DEBUG,
1016                             "unable to register addr 0x%p\n", memhdr->addr);
1017                 return;
1018         }
1019         RTE_ETH_FOREACH_DEV(pid) {
1020                 struct rte_eth_dev_info dev_info;
1021
1022                 ret = eth_dev_info_get_print_err(pid, &dev_info);
1023                 if (ret != 0) {
1024                         TESTPMD_LOG(DEBUG,
1025                                     "unable to get device info for port %d on addr 0x%p,"
1026                                     "mempool mapping will not be performed\n",
1027                                     pid, memhdr->addr);
1028                         continue;
1029                 }
1030                 ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1031                 if (ret) {
1032                         TESTPMD_LOG(DEBUG,
1033                                     "unable to DMA map addr 0x%p "
1034                                     "for device %s\n",
1035                                     memhdr->addr, dev_info.device->name);
1036                 }
1037         }
1038 }
1039 #endif
1040
1041 static unsigned int
1042 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1043             char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1044 {
1045         struct rte_pktmbuf_extmem *xmem;
1046         unsigned int ext_num, zone_num, elt_num;
1047         uint16_t elt_size;
1048
1049         elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1050         elt_num = EXTBUF_ZONE_SIZE / elt_size;
1051         zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1052
1053         xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1054         if (xmem == NULL) {
1055                 TESTPMD_LOG(ERR, "Cannot allocate memory for "
1056                                  "external buffer descriptors\n");
1057                 *ext_mem = NULL;
1058                 return 0;
1059         }
1060         for (ext_num = 0; ext_num < zone_num; ext_num++) {
1061                 struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1062                 const struct rte_memzone *mz;
1063                 char mz_name[RTE_MEMZONE_NAMESIZE];
1064                 int ret;
1065
1066                 ret = snprintf(mz_name, sizeof(mz_name),
1067                         RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1068                 if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1069                         errno = ENAMETOOLONG;
1070                         ext_num = 0;
1071                         break;
1072                 }
1073                 mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
1074                                                  socket_id,
1075                                                  RTE_MEMZONE_IOVA_CONTIG |
1076                                                  RTE_MEMZONE_1GB |
1077                                                  RTE_MEMZONE_SIZE_HINT_ONLY,
1078                                                  EXTBUF_ZONE_SIZE);
1079                 if (mz == NULL) {
1080                         /*
1081                          * The caller exits on external buffer creation
1082                          * error, so there is no need to free memzones.
1083                          */
1084                         errno = ENOMEM;
1085                         ext_num = 0;
1086                         break;
1087                 }
1088                 xseg->buf_ptr = mz->addr;
1089                 xseg->buf_iova = mz->iova;
1090                 xseg->buf_len = EXTBUF_ZONE_SIZE;
1091                 xseg->elt_size = elt_size;
1092         }
1093         if (ext_num == 0 && xmem != NULL) {
1094                 free(xmem);
1095                 xmem = NULL;
1096         }
1097         *ext_mem = xmem;
1098         return ext_num;
1099 }
1100
1101 /*
1102  * Configuration initialisation done once at init time.
1103  */
1104 static struct rte_mempool *
1105 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1106                  unsigned int socket_id, uint16_t size_idx)
1107 {
1108         char pool_name[RTE_MEMPOOL_NAMESIZE];
1109         struct rte_mempool *rte_mp = NULL;
1110 #ifndef RTE_EXEC_ENV_WINDOWS
1111         uint32_t mb_size;
1112
1113         mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1114 #endif
1115         mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1116         if (!is_proc_primary()) {
1117                 rte_mp = rte_mempool_lookup(pool_name);
1118                 if (rte_mp == NULL)
1119                         rte_exit(EXIT_FAILURE,
1120                                 "Get mbuf pool for socket %u failed: %s\n",
1121                                 socket_id, rte_strerror(rte_errno));
1122                 return rte_mp;
1123         }
1124
1125         TESTPMD_LOG(INFO,
1126                 "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1127                 pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1128
1129         switch (mp_alloc_type) {
1130         case MP_ALLOC_NATIVE:
1131                 {
1132                         /* wrapper to rte_mempool_create() */
1133                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1134                                         rte_mbuf_best_mempool_ops());
1135                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1136                                 mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1137                         break;
1138                 }
1139 #ifndef RTE_EXEC_ENV_WINDOWS
1140         case MP_ALLOC_ANON:
1141                 {
1142                         rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1143                                 mb_size, (unsigned int) mb_mempool_cache,
1144                                 sizeof(struct rte_pktmbuf_pool_private),
1145                                 socket_id, mempool_flags);
1146                         if (rte_mp == NULL)
1147                                 goto err;
1148
1149                         if (rte_mempool_populate_anon(rte_mp) == 0) {
1150                                 rte_mempool_free(rte_mp);
1151                                 rte_mp = NULL;
1152                                 goto err;
1153                         }
1154                         rte_pktmbuf_pool_init(rte_mp, NULL);
1155                         rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1156                         rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1157                         break;
1158                 }
1159         case MP_ALLOC_XMEM:
1160         case MP_ALLOC_XMEM_HUGE:
1161                 {
1162                         int heap_socket;
1163                         bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1164
1165                         if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1166                                 rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1167
1168                         heap_socket =
1169                                 rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1170                         if (heap_socket < 0)
1171                                 rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1172
1173                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1174                                         rte_mbuf_best_mempool_ops());
1175                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1176                                         mb_mempool_cache, 0, mbuf_seg_size,
1177                                         heap_socket);
1178                         break;
1179                 }
1180 #endif
1181         case MP_ALLOC_XBUF:
1182                 {
1183                         struct rte_pktmbuf_extmem *ext_mem;
1184                         unsigned int ext_num;
1185
1186                         ext_num = setup_extbuf(nb_mbuf, mbuf_seg_size,
1187                                                socket_id, pool_name, &ext_mem);
1188                         if (ext_num == 0)
1189                                 rte_exit(EXIT_FAILURE,
1190                                          "Can't create pinned data buffers\n");
1191
1192                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1193                                         rte_mbuf_best_mempool_ops());
1194                         rte_mp = rte_pktmbuf_pool_create_extbuf
1195                                         (pool_name, nb_mbuf, mb_mempool_cache,
1196                                          0, mbuf_seg_size, socket_id,
1197                                          ext_mem, ext_num);
1198                         free(ext_mem);
1199                         break;
1200                 }
1201         default:
1202                 {
1203                         rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1204                 }
1205         }
1206
1207 #ifndef RTE_EXEC_ENV_WINDOWS
1208 err:
1209 #endif
1210         if (rte_mp == NULL) {
1211                 rte_exit(EXIT_FAILURE,
1212                         "Creation of mbuf pool for socket %u failed: %s\n",
1213                         socket_id, rte_strerror(rte_errno));
1214         } else if (verbose_level > 0) {
1215                 rte_mempool_dump(stdout, rte_mp);
1216         }
1217         return rte_mp;
1218 }
1219
1220 /*
1221  * Check given socket id is valid or not with NUMA mode,
1222  * if valid, return 0, else return -1
1223  */
1224 static int
1225 check_socket_id(const unsigned int socket_id)
1226 {
1227         static int warning_once = 0;
1228
1229         if (new_socket_id(socket_id)) {
1230                 if (!warning_once && numa_support)
1231                         fprintf(stderr,
1232                                 "Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1233                 warning_once = 1;
1234                 return -1;
1235         }
1236         return 0;
1237 }
1238
1239 /*
1240  * Get the allowed maximum number of RX queues.
1241  * *pid return the port id which has minimal value of
1242  * max_rx_queues in all ports.
1243  */
1244 queueid_t
1245 get_allowed_max_nb_rxq(portid_t *pid)
1246 {
1247         queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1248         bool max_rxq_valid = false;
1249         portid_t pi;
1250         struct rte_eth_dev_info dev_info;
1251
1252         RTE_ETH_FOREACH_DEV(pi) {
1253                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1254                         continue;
1255
1256                 max_rxq_valid = true;
1257                 if (dev_info.max_rx_queues < allowed_max_rxq) {
1258                         allowed_max_rxq = dev_info.max_rx_queues;
1259                         *pid = pi;
1260                 }
1261         }
1262         return max_rxq_valid ? allowed_max_rxq : 0;
1263 }
1264
1265 /*
1266  * Check input rxq is valid or not.
1267  * If input rxq is not greater than any of maximum number
1268  * of RX queues of all ports, it is valid.
1269  * if valid, return 0, else return -1
1270  */
1271 int
1272 check_nb_rxq(queueid_t rxq)
1273 {
1274         queueid_t allowed_max_rxq;
1275         portid_t pid = 0;
1276
1277         allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1278         if (rxq > allowed_max_rxq) {
1279                 fprintf(stderr,
1280                         "Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1281                         rxq, allowed_max_rxq, pid);
1282                 return -1;
1283         }
1284         return 0;
1285 }
1286
1287 /*
1288  * Get the allowed maximum number of TX queues.
1289  * *pid return the port id which has minimal value of
1290  * max_tx_queues in all ports.
1291  */
1292 queueid_t
1293 get_allowed_max_nb_txq(portid_t *pid)
1294 {
1295         queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1296         bool max_txq_valid = false;
1297         portid_t pi;
1298         struct rte_eth_dev_info dev_info;
1299
1300         RTE_ETH_FOREACH_DEV(pi) {
1301                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1302                         continue;
1303
1304                 max_txq_valid = true;
1305                 if (dev_info.max_tx_queues < allowed_max_txq) {
1306                         allowed_max_txq = dev_info.max_tx_queues;
1307                         *pid = pi;
1308                 }
1309         }
1310         return max_txq_valid ? allowed_max_txq : 0;
1311 }
1312
1313 /*
1314  * Check input txq is valid or not.
1315  * If input txq is not greater than any of maximum number
1316  * of TX queues of all ports, it is valid.
1317  * if valid, return 0, else return -1
1318  */
1319 int
1320 check_nb_txq(queueid_t txq)
1321 {
1322         queueid_t allowed_max_txq;
1323         portid_t pid = 0;
1324
1325         allowed_max_txq = get_allowed_max_nb_txq(&pid);
1326         if (txq > allowed_max_txq) {
1327                 fprintf(stderr,
1328                         "Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1329                         txq, allowed_max_txq, pid);
1330                 return -1;
1331         }
1332         return 0;
1333 }
1334
1335 /*
1336  * Get the allowed maximum number of RXDs of every rx queue.
1337  * *pid return the port id which has minimal value of
1338  * max_rxd in all queues of all ports.
1339  */
1340 static uint16_t
1341 get_allowed_max_nb_rxd(portid_t *pid)
1342 {
1343         uint16_t allowed_max_rxd = UINT16_MAX;
1344         portid_t pi;
1345         struct rte_eth_dev_info dev_info;
1346
1347         RTE_ETH_FOREACH_DEV(pi) {
1348                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1349                         continue;
1350
1351                 if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1352                         allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1353                         *pid = pi;
1354                 }
1355         }
1356         return allowed_max_rxd;
1357 }
1358
1359 /*
1360  * Get the allowed minimal number of RXDs of every rx queue.
1361  * *pid return the port id which has minimal value of
1362  * min_rxd in all queues of all ports.
1363  */
1364 static uint16_t
1365 get_allowed_min_nb_rxd(portid_t *pid)
1366 {
1367         uint16_t allowed_min_rxd = 0;
1368         portid_t pi;
1369         struct rte_eth_dev_info dev_info;
1370
1371         RTE_ETH_FOREACH_DEV(pi) {
1372                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1373                         continue;
1374
1375                 if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1376                         allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1377                         *pid = pi;
1378                 }
1379         }
1380
1381         return allowed_min_rxd;
1382 }
1383
1384 /*
1385  * Check input rxd is valid or not.
1386  * If input rxd is not greater than any of maximum number
1387  * of RXDs of every Rx queues and is not less than any of
1388  * minimal number of RXDs of every Rx queues, it is valid.
1389  * if valid, return 0, else return -1
1390  */
1391 int
1392 check_nb_rxd(queueid_t rxd)
1393 {
1394         uint16_t allowed_max_rxd;
1395         uint16_t allowed_min_rxd;
1396         portid_t pid = 0;
1397
1398         allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1399         if (rxd > allowed_max_rxd) {
1400                 fprintf(stderr,
1401                         "Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1402                         rxd, allowed_max_rxd, pid);
1403                 return -1;
1404         }
1405
1406         allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1407         if (rxd < allowed_min_rxd) {
1408                 fprintf(stderr,
1409                         "Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1410                         rxd, allowed_min_rxd, pid);
1411                 return -1;
1412         }
1413
1414         return 0;
1415 }
1416
1417 /*
1418  * Get the allowed maximum number of TXDs of every rx queues.
1419  * *pid return the port id which has minimal value of
1420  * max_txd in every tx queue.
1421  */
1422 static uint16_t
1423 get_allowed_max_nb_txd(portid_t *pid)
1424 {
1425         uint16_t allowed_max_txd = UINT16_MAX;
1426         portid_t pi;
1427         struct rte_eth_dev_info dev_info;
1428
1429         RTE_ETH_FOREACH_DEV(pi) {
1430                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1431                         continue;
1432
1433                 if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1434                         allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1435                         *pid = pi;
1436                 }
1437         }
1438         return allowed_max_txd;
1439 }
1440
1441 /*
1442  * Get the allowed maximum number of TXDs of every tx queues.
1443  * *pid return the port id which has minimal value of
1444  * min_txd in every tx queue.
1445  */
1446 static uint16_t
1447 get_allowed_min_nb_txd(portid_t *pid)
1448 {
1449         uint16_t allowed_min_txd = 0;
1450         portid_t pi;
1451         struct rte_eth_dev_info dev_info;
1452
1453         RTE_ETH_FOREACH_DEV(pi) {
1454                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1455                         continue;
1456
1457                 if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1458                         allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1459                         *pid = pi;
1460                 }
1461         }
1462
1463         return allowed_min_txd;
1464 }
1465
1466 /*
1467  * Check input txd is valid or not.
1468  * If input txd is not greater than any of maximum number
1469  * of TXDs of every Rx queues, it is valid.
1470  * if valid, return 0, else return -1
1471  */
1472 int
1473 check_nb_txd(queueid_t txd)
1474 {
1475         uint16_t allowed_max_txd;
1476         uint16_t allowed_min_txd;
1477         portid_t pid = 0;
1478
1479         allowed_max_txd = get_allowed_max_nb_txd(&pid);
1480         if (txd > allowed_max_txd) {
1481                 fprintf(stderr,
1482                         "Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1483                         txd, allowed_max_txd, pid);
1484                 return -1;
1485         }
1486
1487         allowed_min_txd = get_allowed_min_nb_txd(&pid);
1488         if (txd < allowed_min_txd) {
1489                 fprintf(stderr,
1490                         "Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1491                         txd, allowed_min_txd, pid);
1492                 return -1;
1493         }
1494         return 0;
1495 }
1496
1497
1498 /*
1499  * Get the allowed maximum number of hairpin queues.
1500  * *pid return the port id which has minimal value of
1501  * max_hairpin_queues in all ports.
1502  */
1503 queueid_t
1504 get_allowed_max_nb_hairpinq(portid_t *pid)
1505 {
1506         queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1507         portid_t pi;
1508         struct rte_eth_hairpin_cap cap;
1509
1510         RTE_ETH_FOREACH_DEV(pi) {
1511                 if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1512                         *pid = pi;
1513                         return 0;
1514                 }
1515                 if (cap.max_nb_queues < allowed_max_hairpinq) {
1516                         allowed_max_hairpinq = cap.max_nb_queues;
1517                         *pid = pi;
1518                 }
1519         }
1520         return allowed_max_hairpinq;
1521 }
1522
1523 /*
1524  * Check input hairpin is valid or not.
1525  * If input hairpin is not greater than any of maximum number
1526  * of hairpin queues of all ports, it is valid.
1527  * if valid, return 0, else return -1
1528  */
1529 int
1530 check_nb_hairpinq(queueid_t hairpinq)
1531 {
1532         queueid_t allowed_max_hairpinq;
1533         portid_t pid = 0;
1534
1535         allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1536         if (hairpinq > allowed_max_hairpinq) {
1537                 fprintf(stderr,
1538                         "Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1539                         hairpinq, allowed_max_hairpinq, pid);
1540                 return -1;
1541         }
1542         return 0;
1543 }
1544
1545 static void
1546 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1547 {
1548         struct rte_port *port = &ports[pid];
1549         uint16_t data_size;
1550         int ret;
1551         int i;
1552
1553         eth_rx_metadata_negotiate_mp(pid);
1554         flow_pick_transfer_proxy_mp(pid);
1555
1556         port->dev_conf.txmode = tx_mode;
1557         port->dev_conf.rxmode = rx_mode;
1558
1559         ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1560         if (ret != 0)
1561                 rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1562
1563         ret = update_jumbo_frame_offload(pid);
1564         if (ret != 0)
1565                 fprintf(stderr,
1566                         "Updating jumbo frame offload failed for port %u\n",
1567                         pid);
1568
1569         if (!(port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1570                 port->dev_conf.txmode.offloads &=
1571                         ~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1572
1573         /* Apply Rx offloads configuration */
1574         for (i = 0; i < port->dev_info.max_rx_queues; i++)
1575                 port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1576         /* Apply Tx offloads configuration */
1577         for (i = 0; i < port->dev_info.max_tx_queues; i++)
1578                 port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1579
1580         if (eth_link_speed)
1581                 port->dev_conf.link_speeds = eth_link_speed;
1582
1583         /* set flag to initialize port/queue */
1584         port->need_reconfig = 1;
1585         port->need_reconfig_queues = 1;
1586         port->socket_id = socket_id;
1587         port->tx_metadata = 0;
1588
1589         /*
1590          * Check for maximum number of segments per MTU.
1591          * Accordingly update the mbuf data size.
1592          */
1593         if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1594             port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1595                 data_size = rx_mode.max_rx_pkt_len /
1596                         port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1597
1598                 if ((data_size + RTE_PKTMBUF_HEADROOM) > mbuf_data_size[0]) {
1599                         mbuf_data_size[0] = data_size + RTE_PKTMBUF_HEADROOM;
1600                         TESTPMD_LOG(WARNING,
1601                                     "Configured mbuf size of the first segment %hu\n",
1602                                     mbuf_data_size[0]);
1603                 }
1604         }
1605 }
1606
1607 static void
1608 init_config(void)
1609 {
1610         portid_t pid;
1611         struct rte_mempool *mbp;
1612         unsigned int nb_mbuf_per_pool;
1613         lcoreid_t  lc_id;
1614         struct rte_gro_param gro_param;
1615         uint32_t gso_types;
1616
1617         /* Configuration of logical cores. */
1618         fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1619                                 sizeof(struct fwd_lcore *) * nb_lcores,
1620                                 RTE_CACHE_LINE_SIZE);
1621         if (fwd_lcores == NULL) {
1622                 rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1623                                                         "failed\n", nb_lcores);
1624         }
1625         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1626                 fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1627                                                sizeof(struct fwd_lcore),
1628                                                RTE_CACHE_LINE_SIZE);
1629                 if (fwd_lcores[lc_id] == NULL) {
1630                         rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1631                                                                 "failed\n");
1632                 }
1633                 fwd_lcores[lc_id]->cpuid_idx = lc_id;
1634         }
1635
1636         RTE_ETH_FOREACH_DEV(pid) {
1637                 uint32_t socket_id;
1638
1639                 if (numa_support) {
1640                         socket_id = port_numa[pid];
1641                         if (port_numa[pid] == NUMA_NO_CONFIG) {
1642                                 socket_id = rte_eth_dev_socket_id(pid);
1643
1644                                 /*
1645                                  * if socket_id is invalid,
1646                                  * set to the first available socket.
1647                                  */
1648                                 if (check_socket_id(socket_id) < 0)
1649                                         socket_id = socket_ids[0];
1650                         }
1651                 } else {
1652                         socket_id = (socket_num == UMA_NO_CONFIG) ?
1653                                     0 : socket_num;
1654                 }
1655                 /* Apply default TxRx configuration for all ports */
1656                 init_config_port_offloads(pid, socket_id);
1657         }
1658         /*
1659          * Create pools of mbuf.
1660          * If NUMA support is disabled, create a single pool of mbuf in
1661          * socket 0 memory by default.
1662          * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1663          *
1664          * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1665          * nb_txd can be configured at run time.
1666          */
1667         if (param_total_num_mbufs)
1668                 nb_mbuf_per_pool = param_total_num_mbufs;
1669         else {
1670                 nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1671                         (nb_lcores * mb_mempool_cache) +
1672                         RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1673                 nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1674         }
1675
1676         if (numa_support) {
1677                 uint8_t i, j;
1678
1679                 for (i = 0; i < num_sockets; i++)
1680                         for (j = 0; j < mbuf_data_size_n; j++)
1681                                 mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1682                                         mbuf_pool_create(mbuf_data_size[j],
1683                                                           nb_mbuf_per_pool,
1684                                                           socket_ids[i], j);
1685         } else {
1686                 uint8_t i;
1687
1688                 for (i = 0; i < mbuf_data_size_n; i++)
1689                         mempools[i] = mbuf_pool_create
1690                                         (mbuf_data_size[i],
1691                                          nb_mbuf_per_pool,
1692                                          socket_num == UMA_NO_CONFIG ?
1693                                          0 : socket_num, i);
1694         }
1695
1696         init_port_config();
1697
1698         gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1699                 DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1700         /*
1701          * Records which Mbuf pool to use by each logical core, if needed.
1702          */
1703         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1704                 mbp = mbuf_pool_find(
1705                         rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1706
1707                 if (mbp == NULL)
1708                         mbp = mbuf_pool_find(0, 0);
1709                 fwd_lcores[lc_id]->mbp = mbp;
1710                 /* initialize GSO context */
1711                 fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1712                 fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1713                 fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1714                 fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1715                         RTE_ETHER_CRC_LEN;
1716                 fwd_lcores[lc_id]->gso_ctx.flag = 0;
1717         }
1718
1719         fwd_config_setup();
1720
1721         /* create a gro context for each lcore */
1722         gro_param.gro_types = RTE_GRO_TCP_IPV4;
1723         gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1724         gro_param.max_item_per_flow = MAX_PKT_BURST;
1725         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1726                 gro_param.socket_id = rte_lcore_to_socket_id(
1727                                 fwd_lcores_cpuids[lc_id]);
1728                 fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1729                 if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1730                         rte_exit(EXIT_FAILURE,
1731                                         "rte_gro_ctx_create() failed\n");
1732                 }
1733         }
1734 }
1735
1736
1737 void
1738 reconfig(portid_t new_port_id, unsigned socket_id)
1739 {
1740         /* Reconfiguration of Ethernet ports. */
1741         init_config_port_offloads(new_port_id, socket_id);
1742         init_port_config();
1743 }
1744
1745
1746 int
1747 init_fwd_streams(void)
1748 {
1749         portid_t pid;
1750         struct rte_port *port;
1751         streamid_t sm_id, nb_fwd_streams_new;
1752         queueid_t q;
1753
1754         /* set socket id according to numa or not */
1755         RTE_ETH_FOREACH_DEV(pid) {
1756                 port = &ports[pid];
1757                 if (nb_rxq > port->dev_info.max_rx_queues) {
1758                         fprintf(stderr,
1759                                 "Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1760                                 nb_rxq, port->dev_info.max_rx_queues);
1761                         return -1;
1762                 }
1763                 if (nb_txq > port->dev_info.max_tx_queues) {
1764                         fprintf(stderr,
1765                                 "Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1766                                 nb_txq, port->dev_info.max_tx_queues);
1767                         return -1;
1768                 }
1769                 if (numa_support) {
1770                         if (port_numa[pid] != NUMA_NO_CONFIG)
1771                                 port->socket_id = port_numa[pid];
1772                         else {
1773                                 port->socket_id = rte_eth_dev_socket_id(pid);
1774
1775                                 /*
1776                                  * if socket_id is invalid,
1777                                  * set to the first available socket.
1778                                  */
1779                                 if (check_socket_id(port->socket_id) < 0)
1780                                         port->socket_id = socket_ids[0];
1781                         }
1782                 }
1783                 else {
1784                         if (socket_num == UMA_NO_CONFIG)
1785                                 port->socket_id = 0;
1786                         else
1787                                 port->socket_id = socket_num;
1788                 }
1789         }
1790
1791         q = RTE_MAX(nb_rxq, nb_txq);
1792         if (q == 0) {
1793                 fprintf(stderr,
1794                         "Fail: Cannot allocate fwd streams as number of queues is 0\n");
1795                 return -1;
1796         }
1797         nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1798         if (nb_fwd_streams_new == nb_fwd_streams)
1799                 return 0;
1800         /* clear the old */
1801         if (fwd_streams != NULL) {
1802                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1803                         if (fwd_streams[sm_id] == NULL)
1804                                 continue;
1805                         rte_free(fwd_streams[sm_id]);
1806                         fwd_streams[sm_id] = NULL;
1807                 }
1808                 rte_free(fwd_streams);
1809                 fwd_streams = NULL;
1810         }
1811
1812         /* init new */
1813         nb_fwd_streams = nb_fwd_streams_new;
1814         if (nb_fwd_streams) {
1815                 fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1816                         sizeof(struct fwd_stream *) * nb_fwd_streams,
1817                         RTE_CACHE_LINE_SIZE);
1818                 if (fwd_streams == NULL)
1819                         rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1820                                  " (struct fwd_stream *)) failed\n",
1821                                  nb_fwd_streams);
1822
1823                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1824                         fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1825                                 " struct fwd_stream", sizeof(struct fwd_stream),
1826                                 RTE_CACHE_LINE_SIZE);
1827                         if (fwd_streams[sm_id] == NULL)
1828                                 rte_exit(EXIT_FAILURE, "rte_zmalloc"
1829                                          "(struct fwd_stream) failed\n");
1830                 }
1831         }
1832
1833         return 0;
1834 }
1835
1836 static void
1837 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1838 {
1839         uint64_t total_burst, sburst;
1840         uint64_t nb_burst;
1841         uint64_t burst_stats[4];
1842         uint16_t pktnb_stats[4];
1843         uint16_t nb_pkt;
1844         int burst_percent[4], sburstp;
1845         int i;
1846
1847         /*
1848          * First compute the total number of packet bursts and the
1849          * two highest numbers of bursts of the same number of packets.
1850          */
1851         memset(&burst_stats, 0x0, sizeof(burst_stats));
1852         memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1853
1854         /* Show stats for 0 burst size always */
1855         total_burst = pbs->pkt_burst_spread[0];
1856         burst_stats[0] = pbs->pkt_burst_spread[0];
1857         pktnb_stats[0] = 0;
1858
1859         /* Find the next 2 burst sizes with highest occurrences. */
1860         for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1861                 nb_burst = pbs->pkt_burst_spread[nb_pkt];
1862
1863                 if (nb_burst == 0)
1864                         continue;
1865
1866                 total_burst += nb_burst;
1867
1868                 if (nb_burst > burst_stats[1]) {
1869                         burst_stats[2] = burst_stats[1];
1870                         pktnb_stats[2] = pktnb_stats[1];
1871                         burst_stats[1] = nb_burst;
1872                         pktnb_stats[1] = nb_pkt;
1873                 } else if (nb_burst > burst_stats[2]) {
1874                         burst_stats[2] = nb_burst;
1875                         pktnb_stats[2] = nb_pkt;
1876                 }
1877         }
1878         if (total_burst == 0)
1879                 return;
1880
1881         printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1882         for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1883                 if (i == 3) {
1884                         printf("%d%% of other]\n", 100 - sburstp);
1885                         return;
1886                 }
1887
1888                 sburst += burst_stats[i];
1889                 if (sburst == total_burst) {
1890                         printf("%d%% of %d pkts]\n",
1891                                 100 - sburstp, (int) pktnb_stats[i]);
1892                         return;
1893                 }
1894
1895                 burst_percent[i] =
1896                         (double)burst_stats[i] / total_burst * 100;
1897                 printf("%d%% of %d pkts + ",
1898                         burst_percent[i], (int) pktnb_stats[i]);
1899                 sburstp += burst_percent[i];
1900         }
1901 }
1902
1903 static void
1904 fwd_stream_stats_display(streamid_t stream_id)
1905 {
1906         struct fwd_stream *fs;
1907         static const char *fwd_top_stats_border = "-------";
1908
1909         fs = fwd_streams[stream_id];
1910         if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1911             (fs->fwd_dropped == 0))
1912                 return;
1913         printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1914                "TX Port=%2d/Queue=%2d %s\n",
1915                fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1916                fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1917         printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1918                " TX-dropped: %-14"PRIu64,
1919                fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1920
1921         /* if checksum mode */
1922         if (cur_fwd_eng == &csum_fwd_engine) {
1923                 printf("  RX- bad IP checksum: %-14"PRIu64
1924                        "  Rx- bad L4 checksum: %-14"PRIu64
1925                        " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1926                         fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1927                         fs->rx_bad_outer_l4_csum);
1928                 printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1929                         fs->rx_bad_outer_ip_csum);
1930         } else {
1931                 printf("\n");
1932         }
1933
1934         if (record_burst_stats) {
1935                 pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1936                 pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1937         }
1938 }
1939
1940 void
1941 fwd_stats_display(void)
1942 {
1943         static const char *fwd_stats_border = "----------------------";
1944         static const char *acc_stats_border = "+++++++++++++++";
1945         struct {
1946                 struct fwd_stream *rx_stream;
1947                 struct fwd_stream *tx_stream;
1948                 uint64_t tx_dropped;
1949                 uint64_t rx_bad_ip_csum;
1950                 uint64_t rx_bad_l4_csum;
1951                 uint64_t rx_bad_outer_l4_csum;
1952                 uint64_t rx_bad_outer_ip_csum;
1953         } ports_stats[RTE_MAX_ETHPORTS];
1954         uint64_t total_rx_dropped = 0;
1955         uint64_t total_tx_dropped = 0;
1956         uint64_t total_rx_nombuf = 0;
1957         struct rte_eth_stats stats;
1958         uint64_t fwd_cycles = 0;
1959         uint64_t total_recv = 0;
1960         uint64_t total_xmit = 0;
1961         struct rte_port *port;
1962         streamid_t sm_id;
1963         portid_t pt_id;
1964         int i;
1965
1966         memset(ports_stats, 0, sizeof(ports_stats));
1967
1968         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1969                 struct fwd_stream *fs = fwd_streams[sm_id];
1970
1971                 if (cur_fwd_config.nb_fwd_streams >
1972                     cur_fwd_config.nb_fwd_ports) {
1973                         fwd_stream_stats_display(sm_id);
1974                 } else {
1975                         ports_stats[fs->tx_port].tx_stream = fs;
1976                         ports_stats[fs->rx_port].rx_stream = fs;
1977                 }
1978
1979                 ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
1980
1981                 ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
1982                 ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
1983                 ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
1984                                 fs->rx_bad_outer_l4_csum;
1985                 ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
1986                                 fs->rx_bad_outer_ip_csum;
1987
1988                 if (record_core_cycles)
1989                         fwd_cycles += fs->core_cycles;
1990         }
1991         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1992                 pt_id = fwd_ports_ids[i];
1993                 port = &ports[pt_id];
1994
1995                 rte_eth_stats_get(pt_id, &stats);
1996                 stats.ipackets -= port->stats.ipackets;
1997                 stats.opackets -= port->stats.opackets;
1998                 stats.ibytes -= port->stats.ibytes;
1999                 stats.obytes -= port->stats.obytes;
2000                 stats.imissed -= port->stats.imissed;
2001                 stats.oerrors -= port->stats.oerrors;
2002                 stats.rx_nombuf -= port->stats.rx_nombuf;
2003
2004                 total_recv += stats.ipackets;
2005                 total_xmit += stats.opackets;
2006                 total_rx_dropped += stats.imissed;
2007                 total_tx_dropped += ports_stats[pt_id].tx_dropped;
2008                 total_tx_dropped += stats.oerrors;
2009                 total_rx_nombuf  += stats.rx_nombuf;
2010
2011                 printf("\n  %s Forward statistics for port %-2d %s\n",
2012                        fwd_stats_border, pt_id, fwd_stats_border);
2013
2014                 printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2015                        "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2016                        stats.ipackets + stats.imissed);
2017
2018                 if (cur_fwd_eng == &csum_fwd_engine) {
2019                         printf("  Bad-ipcsum: %-14"PRIu64
2020                                " Bad-l4csum: %-14"PRIu64
2021                                "Bad-outer-l4csum: %-14"PRIu64"\n",
2022                                ports_stats[pt_id].rx_bad_ip_csum,
2023                                ports_stats[pt_id].rx_bad_l4_csum,
2024                                ports_stats[pt_id].rx_bad_outer_l4_csum);
2025                         printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2026                                ports_stats[pt_id].rx_bad_outer_ip_csum);
2027                 }
2028                 if (stats.ierrors + stats.rx_nombuf > 0) {
2029                         printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2030                         printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2031                 }
2032
2033                 printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2034                        "TX-total: %-"PRIu64"\n",
2035                        stats.opackets, ports_stats[pt_id].tx_dropped,
2036                        stats.opackets + ports_stats[pt_id].tx_dropped);
2037
2038                 if (record_burst_stats) {
2039                         if (ports_stats[pt_id].rx_stream)
2040                                 pkt_burst_stats_display("RX",
2041                                         &ports_stats[pt_id].rx_stream->rx_burst_stats);
2042                         if (ports_stats[pt_id].tx_stream)
2043                                 pkt_burst_stats_display("TX",
2044                                 &ports_stats[pt_id].tx_stream->tx_burst_stats);
2045                 }
2046
2047                 printf("  %s--------------------------------%s\n",
2048                        fwd_stats_border, fwd_stats_border);
2049         }
2050
2051         printf("\n  %s Accumulated forward statistics for all ports"
2052                "%s\n",
2053                acc_stats_border, acc_stats_border);
2054         printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2055                "%-"PRIu64"\n"
2056                "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2057                "%-"PRIu64"\n",
2058                total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2059                total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2060         if (total_rx_nombuf > 0)
2061                 printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2062         printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2063                "%s\n",
2064                acc_stats_border, acc_stats_border);
2065         if (record_core_cycles) {
2066 #define CYC_PER_MHZ 1E6
2067                 if (total_recv > 0 || total_xmit > 0) {
2068                         uint64_t total_pkts = 0;
2069                         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2070                             strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2071                                 total_pkts = total_xmit;
2072                         else
2073                                 total_pkts = total_recv;
2074
2075                         printf("\n  CPU cycles/packet=%.2F (total cycles="
2076                                "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2077                                " MHz Clock\n",
2078                                (double) fwd_cycles / total_pkts,
2079                                fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2080                                (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2081                 }
2082         }
2083 }
2084
2085 void
2086 fwd_stats_reset(void)
2087 {
2088         streamid_t sm_id;
2089         portid_t pt_id;
2090         int i;
2091
2092         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2093                 pt_id = fwd_ports_ids[i];
2094                 rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2095         }
2096         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2097                 struct fwd_stream *fs = fwd_streams[sm_id];
2098
2099                 fs->rx_packets = 0;
2100                 fs->tx_packets = 0;
2101                 fs->fwd_dropped = 0;
2102                 fs->rx_bad_ip_csum = 0;
2103                 fs->rx_bad_l4_csum = 0;
2104                 fs->rx_bad_outer_l4_csum = 0;
2105                 fs->rx_bad_outer_ip_csum = 0;
2106
2107                 memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2108                 memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2109                 fs->core_cycles = 0;
2110         }
2111 }
2112
2113 static void
2114 flush_fwd_rx_queues(void)
2115 {
2116         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2117         portid_t  rxp;
2118         portid_t port_id;
2119         queueid_t rxq;
2120         uint16_t  nb_rx;
2121         uint16_t  i;
2122         uint8_t   j;
2123         uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2124         uint64_t timer_period;
2125
2126         if (num_procs > 1) {
2127                 printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2128                 return;
2129         }
2130
2131         /* convert to number of cycles */
2132         timer_period = rte_get_timer_hz(); /* 1 second timeout */
2133
2134         for (j = 0; j < 2; j++) {
2135                 for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2136                         for (rxq = 0; rxq < nb_rxq; rxq++) {
2137                                 port_id = fwd_ports_ids[rxp];
2138                                 /**
2139                                 * testpmd can stuck in the below do while loop
2140                                 * if rte_eth_rx_burst() always returns nonzero
2141                                 * packets. So timer is added to exit this loop
2142                                 * after 1sec timer expiry.
2143                                 */
2144                                 prev_tsc = rte_rdtsc();
2145                                 do {
2146                                         nb_rx = rte_eth_rx_burst(port_id, rxq,
2147                                                 pkts_burst, MAX_PKT_BURST);
2148                                         for (i = 0; i < nb_rx; i++)
2149                                                 rte_pktmbuf_free(pkts_burst[i]);
2150
2151                                         cur_tsc = rte_rdtsc();
2152                                         diff_tsc = cur_tsc - prev_tsc;
2153                                         timer_tsc += diff_tsc;
2154                                 } while ((nb_rx > 0) &&
2155                                         (timer_tsc < timer_period));
2156                                 timer_tsc = 0;
2157                         }
2158                 }
2159                 rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2160         }
2161 }
2162
2163 static void
2164 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2165 {
2166         struct fwd_stream **fsm;
2167         streamid_t nb_fs;
2168         streamid_t sm_id;
2169 #ifdef RTE_LIB_BITRATESTATS
2170         uint64_t tics_per_1sec;
2171         uint64_t tics_datum;
2172         uint64_t tics_current;
2173         uint16_t i, cnt_ports;
2174
2175         cnt_ports = nb_ports;
2176         tics_datum = rte_rdtsc();
2177         tics_per_1sec = rte_get_timer_hz();
2178 #endif
2179         fsm = &fwd_streams[fc->stream_idx];
2180         nb_fs = fc->stream_nb;
2181         do {
2182                 for (sm_id = 0; sm_id < nb_fs; sm_id++)
2183                         (*pkt_fwd)(fsm[sm_id]);
2184 #ifdef RTE_LIB_BITRATESTATS
2185                 if (bitrate_enabled != 0 &&
2186                                 bitrate_lcore_id == rte_lcore_id()) {
2187                         tics_current = rte_rdtsc();
2188                         if (tics_current - tics_datum >= tics_per_1sec) {
2189                                 /* Periodic bitrate calculation */
2190                                 for (i = 0; i < cnt_ports; i++)
2191                                         rte_stats_bitrate_calc(bitrate_data,
2192                                                 ports_ids[i]);
2193                                 tics_datum = tics_current;
2194                         }
2195                 }
2196 #endif
2197 #ifdef RTE_LIB_LATENCYSTATS
2198                 if (latencystats_enabled != 0 &&
2199                                 latencystats_lcore_id == rte_lcore_id())
2200                         rte_latencystats_update();
2201 #endif
2202
2203         } while (! fc->stopped);
2204 }
2205
2206 static int
2207 start_pkt_forward_on_core(void *fwd_arg)
2208 {
2209         run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2210                              cur_fwd_config.fwd_eng->packet_fwd);
2211         return 0;
2212 }
2213
2214 /*
2215  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2216  * Used to start communication flows in network loopback test configurations.
2217  */
2218 static int
2219 run_one_txonly_burst_on_core(void *fwd_arg)
2220 {
2221         struct fwd_lcore *fwd_lc;
2222         struct fwd_lcore tmp_lcore;
2223
2224         fwd_lc = (struct fwd_lcore *) fwd_arg;
2225         tmp_lcore = *fwd_lc;
2226         tmp_lcore.stopped = 1;
2227         run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2228         return 0;
2229 }
2230
2231 /*
2232  * Launch packet forwarding:
2233  *     - Setup per-port forwarding context.
2234  *     - launch logical cores with their forwarding configuration.
2235  */
2236 static void
2237 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2238 {
2239         unsigned int i;
2240         unsigned int lc_id;
2241         int diag;
2242
2243         for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2244                 lc_id = fwd_lcores_cpuids[i];
2245                 if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2246                         fwd_lcores[i]->stopped = 0;
2247                         diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2248                                                      fwd_lcores[i], lc_id);
2249                         if (diag != 0)
2250                                 fprintf(stderr,
2251                                         "launch lcore %u failed - diag=%d\n",
2252                                         lc_id, diag);
2253                 }
2254         }
2255 }
2256
2257 /*
2258  * Launch packet forwarding configuration.
2259  */
2260 void
2261 start_packet_forwarding(int with_tx_first)
2262 {
2263         port_fwd_begin_t port_fwd_begin;
2264         port_fwd_end_t  port_fwd_end;
2265         unsigned int i;
2266
2267         if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2268                 rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2269
2270         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2271                 rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2272
2273         if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2274                 strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2275                 (!nb_rxq || !nb_txq))
2276                 rte_exit(EXIT_FAILURE,
2277                         "Either rxq or txq are 0, cannot use %s fwd mode\n",
2278                         cur_fwd_eng->fwd_mode_name);
2279
2280         if (all_ports_started() == 0) {
2281                 fprintf(stderr, "Not all ports were started\n");
2282                 return;
2283         }
2284         if (test_done == 0) {
2285                 fprintf(stderr, "Packet forwarding already started\n");
2286                 return;
2287         }
2288
2289         fwd_config_setup();
2290
2291         port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2292         if (port_fwd_begin != NULL) {
2293                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2294                         if (port_fwd_begin(fwd_ports_ids[i])) {
2295                                 fprintf(stderr,
2296                                         "Packet forwarding is not ready\n");
2297                                 return;
2298                         }
2299                 }
2300         }
2301
2302         if (with_tx_first) {
2303                 port_fwd_begin = tx_only_engine.port_fwd_begin;
2304                 if (port_fwd_begin != NULL) {
2305                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2306                                 if (port_fwd_begin(fwd_ports_ids[i])) {
2307                                         fprintf(stderr,
2308                                                 "Packet forwarding is not ready\n");
2309                                         return;
2310                                 }
2311                         }
2312                 }
2313         }
2314
2315         test_done = 0;
2316
2317         if(!no_flush_rx)
2318                 flush_fwd_rx_queues();
2319
2320         pkt_fwd_config_display(&cur_fwd_config);
2321         rxtx_config_display();
2322
2323         fwd_stats_reset();
2324         if (with_tx_first) {
2325                 while (with_tx_first--) {
2326                         launch_packet_forwarding(
2327                                         run_one_txonly_burst_on_core);
2328                         rte_eal_mp_wait_lcore();
2329                 }
2330                 port_fwd_end = tx_only_engine.port_fwd_end;
2331                 if (port_fwd_end != NULL) {
2332                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2333                                 (*port_fwd_end)(fwd_ports_ids[i]);
2334                 }
2335         }
2336         launch_packet_forwarding(start_pkt_forward_on_core);
2337 }
2338
2339 void
2340 stop_packet_forwarding(void)
2341 {
2342         port_fwd_end_t port_fwd_end;
2343         lcoreid_t lc_id;
2344         portid_t pt_id;
2345         int i;
2346
2347         if (test_done) {
2348                 fprintf(stderr, "Packet forwarding not started\n");
2349                 return;
2350         }
2351         printf("Telling cores to stop...");
2352         for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2353                 fwd_lcores[lc_id]->stopped = 1;
2354         printf("\nWaiting for lcores to finish...\n");
2355         rte_eal_mp_wait_lcore();
2356         port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2357         if (port_fwd_end != NULL) {
2358                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2359                         pt_id = fwd_ports_ids[i];
2360                         (*port_fwd_end)(pt_id);
2361                 }
2362         }
2363
2364         fwd_stats_display();
2365
2366         printf("\nDone.\n");
2367         test_done = 1;
2368 }
2369
2370 void
2371 dev_set_link_up(portid_t pid)
2372 {
2373         if (rte_eth_dev_set_link_up(pid) < 0)
2374                 fprintf(stderr, "\nSet link up fail.\n");
2375 }
2376
2377 void
2378 dev_set_link_down(portid_t pid)
2379 {
2380         if (rte_eth_dev_set_link_down(pid) < 0)
2381                 fprintf(stderr, "\nSet link down fail.\n");
2382 }
2383
2384 static int
2385 all_ports_started(void)
2386 {
2387         portid_t pi;
2388         struct rte_port *port;
2389
2390         RTE_ETH_FOREACH_DEV(pi) {
2391                 port = &ports[pi];
2392                 /* Check if there is a port which is not started */
2393                 if ((port->port_status != RTE_PORT_STARTED) &&
2394                         (port->slave_flag == 0))
2395                         return 0;
2396         }
2397
2398         /* No port is not started */
2399         return 1;
2400 }
2401
2402 int
2403 port_is_stopped(portid_t port_id)
2404 {
2405         struct rte_port *port = &ports[port_id];
2406
2407         if ((port->port_status != RTE_PORT_STOPPED) &&
2408             (port->slave_flag == 0))
2409                 return 0;
2410         return 1;
2411 }
2412
2413 int
2414 all_ports_stopped(void)
2415 {
2416         portid_t pi;
2417
2418         RTE_ETH_FOREACH_DEV(pi) {
2419                 if (!port_is_stopped(pi))
2420                         return 0;
2421         }
2422
2423         return 1;
2424 }
2425
2426 int
2427 port_is_started(portid_t port_id)
2428 {
2429         if (port_id_is_invalid(port_id, ENABLED_WARN))
2430                 return 0;
2431
2432         if (ports[port_id].port_status != RTE_PORT_STARTED)
2433                 return 0;
2434
2435         return 1;
2436 }
2437
2438 /* Configure the Rx and Tx hairpin queues for the selected port. */
2439 static int
2440 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2441 {
2442         queueid_t qi;
2443         struct rte_eth_hairpin_conf hairpin_conf = {
2444                 .peer_count = 1,
2445         };
2446         int i;
2447         int diag;
2448         struct rte_port *port = &ports[pi];
2449         uint16_t peer_rx_port = pi;
2450         uint16_t peer_tx_port = pi;
2451         uint32_t manual = 1;
2452         uint32_t tx_exp = hairpin_mode & 0x10;
2453
2454         if (!(hairpin_mode & 0xf)) {
2455                 peer_rx_port = pi;
2456                 peer_tx_port = pi;
2457                 manual = 0;
2458         } else if (hairpin_mode & 0x1) {
2459                 peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2460                                                        RTE_ETH_DEV_NO_OWNER);
2461                 if (peer_tx_port >= RTE_MAX_ETHPORTS)
2462                         peer_tx_port = rte_eth_find_next_owned_by(0,
2463                                                 RTE_ETH_DEV_NO_OWNER);
2464                 if (p_pi != RTE_MAX_ETHPORTS) {
2465                         peer_rx_port = p_pi;
2466                 } else {
2467                         uint16_t next_pi;
2468
2469                         /* Last port will be the peer RX port of the first. */
2470                         RTE_ETH_FOREACH_DEV(next_pi)
2471                                 peer_rx_port = next_pi;
2472                 }
2473                 manual = 1;
2474         } else if (hairpin_mode & 0x2) {
2475                 if (cnt_pi & 0x1) {
2476                         peer_rx_port = p_pi;
2477                 } else {
2478                         peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2479                                                 RTE_ETH_DEV_NO_OWNER);
2480                         if (peer_rx_port >= RTE_MAX_ETHPORTS)
2481                                 peer_rx_port = pi;
2482                 }
2483                 peer_tx_port = peer_rx_port;
2484                 manual = 1;
2485         }
2486
2487         for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2488                 hairpin_conf.peers[0].port = peer_rx_port;
2489                 hairpin_conf.peers[0].queue = i + nb_rxq;
2490                 hairpin_conf.manual_bind = !!manual;
2491                 hairpin_conf.tx_explicit = !!tx_exp;
2492                 diag = rte_eth_tx_hairpin_queue_setup
2493                         (pi, qi, nb_txd, &hairpin_conf);
2494                 i++;
2495                 if (diag == 0)
2496                         continue;
2497
2498                 /* Fail to setup rx queue, return */
2499                 if (rte_atomic16_cmpset(&(port->port_status),
2500                                         RTE_PORT_HANDLING,
2501                                         RTE_PORT_STOPPED) == 0)
2502                         fprintf(stderr,
2503                                 "Port %d can not be set back to stopped\n", pi);
2504                 fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2505                         pi);
2506                 /* try to reconfigure queues next time */
2507                 port->need_reconfig_queues = 1;
2508                 return -1;
2509         }
2510         for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2511                 hairpin_conf.peers[0].port = peer_tx_port;
2512                 hairpin_conf.peers[0].queue = i + nb_txq;
2513                 hairpin_conf.manual_bind = !!manual;
2514                 hairpin_conf.tx_explicit = !!tx_exp;
2515                 diag = rte_eth_rx_hairpin_queue_setup
2516                         (pi, qi, nb_rxd, &hairpin_conf);
2517                 i++;
2518                 if (diag == 0)
2519                         continue;
2520
2521                 /* Fail to setup rx queue, return */
2522                 if (rte_atomic16_cmpset(&(port->port_status),
2523                                         RTE_PORT_HANDLING,
2524                                         RTE_PORT_STOPPED) == 0)
2525                         fprintf(stderr,
2526                                 "Port %d can not be set back to stopped\n", pi);
2527                 fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2528                         pi);
2529                 /* try to reconfigure queues next time */
2530                 port->need_reconfig_queues = 1;
2531                 return -1;
2532         }
2533         return 0;
2534 }
2535
2536 /* Configure the Rx with optional split. */
2537 int
2538 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2539                uint16_t nb_rx_desc, unsigned int socket_id,
2540                struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2541 {
2542         union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2543         unsigned int i, mp_n;
2544         int ret;
2545
2546         if (rx_pkt_nb_segs <= 1 ||
2547             (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2548                 rx_conf->rx_seg = NULL;
2549                 rx_conf->rx_nseg = 0;
2550                 ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2551                                              nb_rx_desc, socket_id,
2552                                              rx_conf, mp);
2553                 return ret;
2554         }
2555         for (i = 0; i < rx_pkt_nb_segs; i++) {
2556                 struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2557                 struct rte_mempool *mpx;
2558                 /*
2559                  * Use last valid pool for the segments with number
2560                  * exceeding the pool index.
2561                  */
2562                 mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2563                 mpx = mbuf_pool_find(socket_id, mp_n);
2564                 /* Handle zero as mbuf data buffer size. */
2565                 rx_seg->length = rx_pkt_seg_lengths[i] ?
2566                                    rx_pkt_seg_lengths[i] :
2567                                    mbuf_data_size[mp_n];
2568                 rx_seg->offset = i < rx_pkt_nb_offs ?
2569                                    rx_pkt_seg_offsets[i] : 0;
2570                 rx_seg->mp = mpx ? mpx : mp;
2571         }
2572         rx_conf->rx_nseg = rx_pkt_nb_segs;
2573         rx_conf->rx_seg = rx_useg;
2574         ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2575                                     socket_id, rx_conf, NULL);
2576         rx_conf->rx_seg = NULL;
2577         rx_conf->rx_nseg = 0;
2578         return ret;
2579 }
2580
2581 static int
2582 alloc_xstats_display_info(portid_t pi)
2583 {
2584         uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2585         uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2586         uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2587
2588         if (xstats_display_num == 0)
2589                 return 0;
2590
2591         *ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2592         if (*ids_supp == NULL)
2593                 goto fail_ids_supp;
2594
2595         *prev_values = calloc(xstats_display_num,
2596                               sizeof(**prev_values));
2597         if (*prev_values == NULL)
2598                 goto fail_prev_values;
2599
2600         *curr_values = calloc(xstats_display_num,
2601                               sizeof(**curr_values));
2602         if (*curr_values == NULL)
2603                 goto fail_curr_values;
2604
2605         ports[pi].xstats_info.allocated = true;
2606
2607         return 0;
2608
2609 fail_curr_values:
2610         free(*prev_values);
2611 fail_prev_values:
2612         free(*ids_supp);
2613 fail_ids_supp:
2614         return -ENOMEM;
2615 }
2616
2617 static void
2618 free_xstats_display_info(portid_t pi)
2619 {
2620         if (!ports[pi].xstats_info.allocated)
2621                 return;
2622         free(ports[pi].xstats_info.ids_supp);
2623         free(ports[pi].xstats_info.prev_values);
2624         free(ports[pi].xstats_info.curr_values);
2625         ports[pi].xstats_info.allocated = false;
2626 }
2627
2628 /** Fill helper structures for specified port to show extended statistics. */
2629 static void
2630 fill_xstats_display_info_for_port(portid_t pi)
2631 {
2632         unsigned int stat, stat_supp;
2633         const char *xstat_name;
2634         struct rte_port *port;
2635         uint64_t *ids_supp;
2636         int rc;
2637
2638         if (xstats_display_num == 0)
2639                 return;
2640
2641         if (pi == (portid_t)RTE_PORT_ALL) {
2642                 fill_xstats_display_info();
2643                 return;
2644         }
2645
2646         port = &ports[pi];
2647         if (port->port_status != RTE_PORT_STARTED)
2648                 return;
2649
2650         if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2651                 rte_exit(EXIT_FAILURE,
2652                          "Failed to allocate xstats display memory\n");
2653
2654         ids_supp = port->xstats_info.ids_supp;
2655         for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2656                 xstat_name = xstats_display[stat].name;
2657                 rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2658                                                    ids_supp + stat_supp);
2659                 if (rc != 0) {
2660                         fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2661                                 xstat_name, pi, stat);
2662                         continue;
2663                 }
2664                 stat_supp++;
2665         }
2666
2667         port->xstats_info.ids_supp_sz = stat_supp;
2668 }
2669
2670 /** Fill helper structures for all ports to show extended statistics. */
2671 static void
2672 fill_xstats_display_info(void)
2673 {
2674         portid_t pi;
2675
2676         if (xstats_display_num == 0)
2677                 return;
2678
2679         RTE_ETH_FOREACH_DEV(pi)
2680                 fill_xstats_display_info_for_port(pi);
2681 }
2682
2683 int
2684 start_port(portid_t pid)
2685 {
2686         int diag, need_check_link_status = -1;
2687         portid_t pi;
2688         portid_t p_pi = RTE_MAX_ETHPORTS;
2689         portid_t pl[RTE_MAX_ETHPORTS];
2690         portid_t peer_pl[RTE_MAX_ETHPORTS];
2691         uint16_t cnt_pi = 0;
2692         uint16_t cfg_pi = 0;
2693         int peer_pi;
2694         queueid_t qi;
2695         struct rte_port *port;
2696         struct rte_eth_hairpin_cap cap;
2697
2698         if (port_id_is_invalid(pid, ENABLED_WARN))
2699                 return 0;
2700
2701         RTE_ETH_FOREACH_DEV(pi) {
2702                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2703                         continue;
2704
2705                 need_check_link_status = 0;
2706                 port = &ports[pi];
2707                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2708                                                  RTE_PORT_HANDLING) == 0) {
2709                         fprintf(stderr, "Port %d is now not stopped\n", pi);
2710                         continue;
2711                 }
2712
2713                 if (port->need_reconfig > 0) {
2714                         struct rte_eth_conf dev_conf;
2715                         int k;
2716
2717                         port->need_reconfig = 0;
2718
2719                         if (flow_isolate_all) {
2720                                 int ret = port_flow_isolate(pi, 1);
2721                                 if (ret) {
2722                                         fprintf(stderr,
2723                                                 "Failed to apply isolated mode on port %d\n",
2724                                                 pi);
2725                                         return -1;
2726                                 }
2727                         }
2728                         configure_rxtx_dump_callbacks(0);
2729                         printf("Configuring Port %d (socket %u)\n", pi,
2730                                         port->socket_id);
2731                         if (nb_hairpinq > 0 &&
2732                             rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2733                                 fprintf(stderr,
2734                                         "Port %d doesn't support hairpin queues\n",
2735                                         pi);
2736                                 return -1;
2737                         }
2738                         /* configure port */
2739                         diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2740                                                      nb_txq + nb_hairpinq,
2741                                                      &(port->dev_conf));
2742                         if (diag != 0) {
2743                                 if (rte_atomic16_cmpset(&(port->port_status),
2744                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2745                                         fprintf(stderr,
2746                                                 "Port %d can not be set back to stopped\n",
2747                                                 pi);
2748                                 fprintf(stderr, "Fail to configure port %d\n",
2749                                         pi);
2750                                 /* try to reconfigure port next time */
2751                                 port->need_reconfig = 1;
2752                                 return -1;
2753                         }
2754                         /* get device configuration*/
2755                         if (0 !=
2756                                 eth_dev_conf_get_print_err(pi, &dev_conf)) {
2757                                 fprintf(stderr,
2758                                         "port %d can not get device configuration\n",
2759                                         pi);
2760                                 return -1;
2761                         }
2762                         /* Apply Rx offloads configuration */
2763                         if (dev_conf.rxmode.offloads !=
2764                             port->dev_conf.rxmode.offloads) {
2765                                 port->dev_conf.rxmode.offloads |=
2766                                         dev_conf.rxmode.offloads;
2767                                 for (k = 0;
2768                                      k < port->dev_info.max_rx_queues;
2769                                      k++)
2770                                         port->rx_conf[k].offloads |=
2771                                                 dev_conf.rxmode.offloads;
2772                         }
2773                         /* Apply Tx offloads configuration */
2774                         if (dev_conf.txmode.offloads !=
2775                             port->dev_conf.txmode.offloads) {
2776                                 port->dev_conf.txmode.offloads |=
2777                                         dev_conf.txmode.offloads;
2778                                 for (k = 0;
2779                                      k < port->dev_info.max_tx_queues;
2780                                      k++)
2781                                         port->tx_conf[k].offloads |=
2782                                                 dev_conf.txmode.offloads;
2783                         }
2784                 }
2785                 if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2786                         port->need_reconfig_queues = 0;
2787                         /* setup tx queues */
2788                         for (qi = 0; qi < nb_txq; qi++) {
2789                                 if ((numa_support) &&
2790                                         (txring_numa[pi] != NUMA_NO_CONFIG))
2791                                         diag = rte_eth_tx_queue_setup(pi, qi,
2792                                                 port->nb_tx_desc[qi],
2793                                                 txring_numa[pi],
2794                                                 &(port->tx_conf[qi]));
2795                                 else
2796                                         diag = rte_eth_tx_queue_setup(pi, qi,
2797                                                 port->nb_tx_desc[qi],
2798                                                 port->socket_id,
2799                                                 &(port->tx_conf[qi]));
2800
2801                                 if (diag == 0)
2802                                         continue;
2803
2804                                 /* Fail to setup tx queue, return */
2805                                 if (rte_atomic16_cmpset(&(port->port_status),
2806                                                         RTE_PORT_HANDLING,
2807                                                         RTE_PORT_STOPPED) == 0)
2808                                         fprintf(stderr,
2809                                                 "Port %d can not be set back to stopped\n",
2810                                                 pi);
2811                                 fprintf(stderr,
2812                                         "Fail to configure port %d tx queues\n",
2813                                         pi);
2814                                 /* try to reconfigure queues next time */
2815                                 port->need_reconfig_queues = 1;
2816                                 return -1;
2817                         }
2818                         for (qi = 0; qi < nb_rxq; qi++) {
2819                                 /* setup rx queues */
2820                                 if ((numa_support) &&
2821                                         (rxring_numa[pi] != NUMA_NO_CONFIG)) {
2822                                         struct rte_mempool * mp =
2823                                                 mbuf_pool_find
2824                                                         (rxring_numa[pi], 0);
2825                                         if (mp == NULL) {
2826                                                 fprintf(stderr,
2827                                                         "Failed to setup RX queue: No mempool allocation on the socket %d\n",
2828                                                         rxring_numa[pi]);
2829                                                 return -1;
2830                                         }
2831
2832                                         diag = rx_queue_setup(pi, qi,
2833                                              port->nb_rx_desc[qi],
2834                                              rxring_numa[pi],
2835                                              &(port->rx_conf[qi]),
2836                                              mp);
2837                                 } else {
2838                                         struct rte_mempool *mp =
2839                                                 mbuf_pool_find
2840                                                         (port->socket_id, 0);
2841                                         if (mp == NULL) {
2842                                                 fprintf(stderr,
2843                                                         "Failed to setup RX queue: No mempool allocation on the socket %d\n",
2844                                                         port->socket_id);
2845                                                 return -1;
2846                                         }
2847                                         diag = rx_queue_setup(pi, qi,
2848                                              port->nb_rx_desc[qi],
2849                                              port->socket_id,
2850                                              &(port->rx_conf[qi]),
2851                                              mp);
2852                                 }
2853                                 if (diag == 0)
2854                                         continue;
2855
2856                                 /* Fail to setup rx queue, return */
2857                                 if (rte_atomic16_cmpset(&(port->port_status),
2858                                                         RTE_PORT_HANDLING,
2859                                                         RTE_PORT_STOPPED) == 0)
2860                                         fprintf(stderr,
2861                                                 "Port %d can not be set back to stopped\n",
2862                                                 pi);
2863                                 fprintf(stderr,
2864                                         "Fail to configure port %d rx queues\n",
2865                                         pi);
2866                                 /* try to reconfigure queues next time */
2867                                 port->need_reconfig_queues = 1;
2868                                 return -1;
2869                         }
2870                         /* setup hairpin queues */
2871                         if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2872                                 return -1;
2873                 }
2874                 configure_rxtx_dump_callbacks(verbose_level);
2875                 if (clear_ptypes) {
2876                         diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2877                                         NULL, 0);
2878                         if (diag < 0)
2879                                 fprintf(stderr,
2880                                         "Port %d: Failed to disable Ptype parsing\n",
2881                                         pi);
2882                 }
2883
2884                 p_pi = pi;
2885                 cnt_pi++;
2886
2887                 /* start port */
2888                 diag = eth_dev_start_mp(pi);
2889                 if (diag < 0) {
2890                         fprintf(stderr, "Fail to start port %d: %s\n",
2891                                 pi, rte_strerror(-diag));
2892
2893                         /* Fail to setup rx queue, return */
2894                         if (rte_atomic16_cmpset(&(port->port_status),
2895                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2896                                 fprintf(stderr,
2897                                         "Port %d can not be set back to stopped\n",
2898                                         pi);
2899                         continue;
2900                 }
2901
2902                 if (rte_atomic16_cmpset(&(port->port_status),
2903                         RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2904                         fprintf(stderr, "Port %d can not be set into started\n",
2905                                 pi);
2906
2907                 if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2908                         printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
2909                                         RTE_ETHER_ADDR_BYTES(&port->eth_addr));
2910
2911                 /* at least one port started, need checking link status */
2912                 need_check_link_status = 1;
2913
2914                 pl[cfg_pi++] = pi;
2915         }
2916
2917         if (need_check_link_status == 1 && !no_link_check)
2918                 check_all_ports_link_status(RTE_PORT_ALL);
2919         else if (need_check_link_status == 0)
2920                 fprintf(stderr, "Please stop the ports first\n");
2921
2922         if (hairpin_mode & 0xf) {
2923                 uint16_t i;
2924                 int j;
2925
2926                 /* bind all started hairpin ports */
2927                 for (i = 0; i < cfg_pi; i++) {
2928                         pi = pl[i];
2929                         /* bind current Tx to all peer Rx */
2930                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2931                                                         RTE_MAX_ETHPORTS, 1);
2932                         if (peer_pi < 0)
2933                                 return peer_pi;
2934                         for (j = 0; j < peer_pi; j++) {
2935                                 if (!port_is_started(peer_pl[j]))
2936                                         continue;
2937                                 diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2938                                 if (diag < 0) {
2939                                         fprintf(stderr,
2940                                                 "Error during binding hairpin Tx port %u to %u: %s\n",
2941                                                 pi, peer_pl[j],
2942                                                 rte_strerror(-diag));
2943                                         return -1;
2944                                 }
2945                         }
2946                         /* bind all peer Tx to current Rx */
2947                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2948                                                         RTE_MAX_ETHPORTS, 0);
2949                         if (peer_pi < 0)
2950                                 return peer_pi;
2951                         for (j = 0; j < peer_pi; j++) {
2952                                 if (!port_is_started(peer_pl[j]))
2953                                         continue;
2954                                 diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2955                                 if (diag < 0) {
2956                                         fprintf(stderr,
2957                                                 "Error during binding hairpin Tx port %u to %u: %s\n",
2958                                                 peer_pl[j], pi,
2959                                                 rte_strerror(-diag));
2960                                         return -1;
2961                                 }
2962                         }
2963                 }
2964         }
2965
2966         fill_xstats_display_info_for_port(pid);
2967
2968         printf("Done\n");
2969         return 0;
2970 }
2971
2972 void
2973 stop_port(portid_t pid)
2974 {
2975         portid_t pi;
2976         struct rte_port *port;
2977         int need_check_link_status = 0;
2978         portid_t peer_pl[RTE_MAX_ETHPORTS];
2979         int peer_pi;
2980
2981         if (port_id_is_invalid(pid, ENABLED_WARN))
2982                 return;
2983
2984         printf("Stopping ports...\n");
2985
2986         RTE_ETH_FOREACH_DEV(pi) {
2987                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2988                         continue;
2989
2990                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
2991                         fprintf(stderr,
2992                                 "Please remove port %d from forwarding configuration.\n",
2993                                 pi);
2994                         continue;
2995                 }
2996
2997                 if (port_is_bonding_slave(pi)) {
2998                         fprintf(stderr,
2999                                 "Please remove port %d from bonded device.\n",
3000                                 pi);
3001                         continue;
3002                 }
3003
3004                 port = &ports[pi];
3005                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
3006                                                 RTE_PORT_HANDLING) == 0)
3007                         continue;
3008
3009                 if (hairpin_mode & 0xf) {
3010                         int j;
3011
3012                         rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3013                         /* unbind all peer Tx from current Rx */
3014                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3015                                                         RTE_MAX_ETHPORTS, 0);
3016                         if (peer_pi < 0)
3017                                 continue;
3018                         for (j = 0; j < peer_pi; j++) {
3019                                 if (!port_is_started(peer_pl[j]))
3020                                         continue;
3021                                 rte_eth_hairpin_unbind(peer_pl[j], pi);
3022                         }
3023                 }
3024
3025                 if (port->flow_list)
3026                         port_flow_flush(pi);
3027
3028                 if (eth_dev_stop_mp(pi) != 0)
3029                         RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3030                                 pi);
3031
3032                 if (rte_atomic16_cmpset(&(port->port_status),
3033                         RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
3034                         fprintf(stderr, "Port %d can not be set into stopped\n",
3035                                 pi);
3036                 need_check_link_status = 1;
3037         }
3038         if (need_check_link_status && !no_link_check)
3039                 check_all_ports_link_status(RTE_PORT_ALL);
3040
3041         printf("Done\n");
3042 }
3043
3044 static void
3045 remove_invalid_ports_in(portid_t *array, portid_t *total)
3046 {
3047         portid_t i;
3048         portid_t new_total = 0;
3049
3050         for (i = 0; i < *total; i++)
3051                 if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3052                         array[new_total] = array[i];
3053                         new_total++;
3054                 }
3055         *total = new_total;
3056 }
3057
3058 static void
3059 remove_invalid_ports(void)
3060 {
3061         remove_invalid_ports_in(ports_ids, &nb_ports);
3062         remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3063         nb_cfg_ports = nb_fwd_ports;
3064 }
3065
3066 void
3067 close_port(portid_t pid)
3068 {
3069         portid_t pi;
3070         struct rte_port *port;
3071
3072         if (port_id_is_invalid(pid, ENABLED_WARN))
3073                 return;
3074
3075         printf("Closing ports...\n");
3076
3077         RTE_ETH_FOREACH_DEV(pi) {
3078                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3079                         continue;
3080
3081                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
3082                         fprintf(stderr,
3083                                 "Please remove port %d from forwarding configuration.\n",
3084                                 pi);
3085                         continue;
3086                 }
3087
3088                 if (port_is_bonding_slave(pi)) {
3089                         fprintf(stderr,
3090                                 "Please remove port %d from bonded device.\n",
3091                                 pi);
3092                         continue;
3093                 }
3094
3095                 port = &ports[pi];
3096                 if (rte_atomic16_cmpset(&(port->port_status),
3097                         RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
3098                         fprintf(stderr, "Port %d is already closed\n", pi);
3099                         continue;
3100                 }
3101
3102                 if (is_proc_primary()) {
3103                         port_flow_flush(pi);
3104                         rte_eth_dev_close(pi);
3105                 }
3106
3107                 free_xstats_display_info(pi);
3108         }
3109
3110         remove_invalid_ports();
3111         printf("Done\n");
3112 }
3113
3114 void
3115 reset_port(portid_t pid)
3116 {
3117         int diag;
3118         portid_t pi;
3119         struct rte_port *port;
3120
3121         if (port_id_is_invalid(pid, ENABLED_WARN))
3122                 return;
3123
3124         if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3125                 (pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3126                 fprintf(stderr,
3127                         "Can not reset port(s), please stop port(s) first.\n");
3128                 return;
3129         }
3130
3131         printf("Resetting ports...\n");
3132
3133         RTE_ETH_FOREACH_DEV(pi) {
3134                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3135                         continue;
3136
3137                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
3138                         fprintf(stderr,
3139                                 "Please remove port %d from forwarding configuration.\n",
3140                                 pi);
3141                         continue;
3142                 }
3143
3144                 if (port_is_bonding_slave(pi)) {
3145                         fprintf(stderr,
3146                                 "Please remove port %d from bonded device.\n",
3147                                 pi);
3148                         continue;
3149                 }
3150
3151                 diag = rte_eth_dev_reset(pi);
3152                 if (diag == 0) {
3153                         port = &ports[pi];
3154                         port->need_reconfig = 1;
3155                         port->need_reconfig_queues = 1;
3156                 } else {
3157                         fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3158                                 pi, diag);
3159                 }
3160         }
3161
3162         printf("Done\n");
3163 }
3164
3165 void
3166 attach_port(char *identifier)
3167 {
3168         portid_t pi;
3169         struct rte_dev_iterator iterator;
3170
3171         printf("Attaching a new port...\n");
3172
3173         if (identifier == NULL) {
3174                 fprintf(stderr, "Invalid parameters are specified\n");
3175                 return;
3176         }
3177
3178         if (rte_dev_probe(identifier) < 0) {
3179                 TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3180                 return;
3181         }
3182
3183         /* first attach mode: event */
3184         if (setup_on_probe_event) {
3185                 /* new ports are detected on RTE_ETH_EVENT_NEW event */
3186                 for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3187                         if (ports[pi].port_status == RTE_PORT_HANDLING &&
3188                                         ports[pi].need_setup != 0)
3189                                 setup_attached_port(pi);
3190                 return;
3191         }
3192
3193         /* second attach mode: iterator */
3194         RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3195                 /* setup ports matching the devargs used for probing */
3196                 if (port_is_forwarding(pi))
3197                         continue; /* port was already attached before */
3198                 setup_attached_port(pi);
3199         }
3200 }
3201
3202 static void
3203 setup_attached_port(portid_t pi)
3204 {
3205         unsigned int socket_id;
3206         int ret;
3207
3208         socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3209         /* if socket_id is invalid, set to the first available socket. */
3210         if (check_socket_id(socket_id) < 0)
3211                 socket_id = socket_ids[0];
3212         reconfig(pi, socket_id);
3213         ret = rte_eth_promiscuous_enable(pi);
3214         if (ret != 0)
3215                 fprintf(stderr,
3216                         "Error during enabling promiscuous mode for port %u: %s - ignore\n",
3217                         pi, rte_strerror(-ret));
3218
3219         ports_ids[nb_ports++] = pi;
3220         fwd_ports_ids[nb_fwd_ports++] = pi;
3221         nb_cfg_ports = nb_fwd_ports;
3222         ports[pi].need_setup = 0;
3223         ports[pi].port_status = RTE_PORT_STOPPED;
3224
3225         printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3226         printf("Done\n");
3227 }
3228
3229 static void
3230 detach_device(struct rte_device *dev)
3231 {
3232         portid_t sibling;
3233
3234         if (dev == NULL) {
3235                 fprintf(stderr, "Device already removed\n");
3236                 return;
3237         }
3238
3239         printf("Removing a device...\n");
3240
3241         RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3242                 if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3243                         if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3244                                 fprintf(stderr, "Port %u not stopped\n",
3245                                         sibling);
3246                                 return;
3247                         }
3248                         port_flow_flush(sibling);
3249                 }
3250         }
3251
3252         if (rte_dev_remove(dev) < 0) {
3253                 TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
3254                 return;
3255         }
3256         remove_invalid_ports();
3257
3258         printf("Device is detached\n");
3259         printf("Now total ports is %d\n", nb_ports);
3260         printf("Done\n");
3261         return;
3262 }
3263
3264 void
3265 detach_port_device(portid_t port_id)
3266 {
3267         int ret;
3268         struct rte_eth_dev_info dev_info;
3269
3270         if (port_id_is_invalid(port_id, ENABLED_WARN))
3271                 return;
3272
3273         if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3274                 if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3275                         fprintf(stderr, "Port not stopped\n");
3276                         return;
3277                 }
3278                 fprintf(stderr, "Port was not closed\n");
3279         }
3280
3281         ret = eth_dev_info_get_print_err(port_id, &dev_info);
3282         if (ret != 0) {
3283                 TESTPMD_LOG(ERR,
3284                         "Failed to get device info for port %d, not detaching\n",
3285                         port_id);
3286                 return;
3287         }
3288         detach_device(dev_info.device);
3289 }
3290
3291 void
3292 detach_devargs(char *identifier)
3293 {
3294         struct rte_dev_iterator iterator;
3295         struct rte_devargs da;
3296         portid_t port_id;
3297
3298         printf("Removing a device...\n");
3299
3300         memset(&da, 0, sizeof(da));
3301         if (rte_devargs_parsef(&da, "%s", identifier)) {
3302                 fprintf(stderr, "cannot parse identifier\n");
3303                 return;
3304         }
3305
3306         RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3307                 if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3308                         if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3309                                 fprintf(stderr, "Port %u not stopped\n",
3310                                         port_id);
3311                                 rte_eth_iterator_cleanup(&iterator);
3312                                 rte_devargs_reset(&da);
3313                                 return;
3314                         }
3315                         port_flow_flush(port_id);
3316                 }
3317         }
3318
3319         if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3320                 TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3321                             da.name, da.bus->name);
3322                 rte_devargs_reset(&da);
3323                 return;
3324         }
3325
3326         remove_invalid_ports();
3327
3328         printf("Device %s is detached\n", identifier);
3329         printf("Now total ports is %d\n", nb_ports);
3330         printf("Done\n");
3331         rte_devargs_reset(&da);
3332 }
3333
3334 void
3335 pmd_test_exit(void)
3336 {
3337         portid_t pt_id;
3338         unsigned int i;
3339         int ret;
3340
3341         if (test_done == 0)
3342                 stop_packet_forwarding();
3343
3344 #ifndef RTE_EXEC_ENV_WINDOWS
3345         for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3346                 if (mempools[i]) {
3347                         if (mp_alloc_type == MP_ALLOC_ANON)
3348                                 rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3349                                                      NULL);
3350                 }
3351         }
3352 #endif
3353         if (ports != NULL) {
3354                 no_link_check = 1;
3355                 RTE_ETH_FOREACH_DEV(pt_id) {
3356                         printf("\nStopping port %d...\n", pt_id);
3357                         fflush(stdout);
3358                         stop_port(pt_id);
3359                 }
3360                 RTE_ETH_FOREACH_DEV(pt_id) {
3361                         printf("\nShutting down port %d...\n", pt_id);
3362                         fflush(stdout);
3363                         close_port(pt_id);
3364                 }
3365         }
3366
3367         if (hot_plug) {
3368                 ret = rte_dev_event_monitor_stop();
3369                 if (ret) {
3370                         RTE_LOG(ERR, EAL,
3371                                 "fail to stop device event monitor.");
3372                         return;
3373                 }
3374
3375                 ret = rte_dev_event_callback_unregister(NULL,
3376                         dev_event_callback, NULL);
3377                 if (ret < 0) {
3378                         RTE_LOG(ERR, EAL,
3379                                 "fail to unregister device event callback.\n");
3380                         return;
3381                 }
3382
3383                 ret = rte_dev_hotplug_handle_disable();
3384                 if (ret) {
3385                         RTE_LOG(ERR, EAL,
3386                                 "fail to disable hotplug handling.\n");
3387                         return;
3388                 }
3389         }
3390         for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3391                 if (mempools[i])
3392                         mempool_free_mp(mempools[i]);
3393         }
3394         free(xstats_display);
3395
3396         printf("\nBye...\n");
3397 }
3398
3399 typedef void (*cmd_func_t)(void);
3400 struct pmd_test_command {
3401         const char *cmd_name;
3402         cmd_func_t cmd_func;
3403 };
3404
3405 /* Check the link status of all ports in up to 9s, and print them finally */
3406 static void
3407 check_all_ports_link_status(uint32_t port_mask)
3408 {
3409 #define CHECK_INTERVAL 100 /* 100ms */
3410 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3411         portid_t portid;
3412         uint8_t count, all_ports_up, print_flag = 0;
3413         struct rte_eth_link link;
3414         int ret;
3415         char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3416
3417         printf("Checking link statuses...\n");
3418         fflush(stdout);
3419         for (count = 0; count <= MAX_CHECK_TIME; count++) {
3420                 all_ports_up = 1;
3421                 RTE_ETH_FOREACH_DEV(portid) {
3422                         if ((port_mask & (1 << portid)) == 0)
3423                                 continue;
3424                         memset(&link, 0, sizeof(link));
3425                         ret = rte_eth_link_get_nowait(portid, &link);
3426                         if (ret < 0) {
3427                                 all_ports_up = 0;
3428                                 if (print_flag == 1)
3429                                         fprintf(stderr,
3430                                                 "Port %u link get failed: %s\n",
3431                                                 portid, rte_strerror(-ret));
3432                                 continue;
3433                         }
3434                         /* print link status if flag set */
3435                         if (print_flag == 1) {
3436                                 rte_eth_link_to_str(link_status,
3437                                         sizeof(link_status), &link);
3438                                 printf("Port %d %s\n", portid, link_status);
3439                                 continue;
3440                         }
3441                         /* clear all_ports_up flag if any link down */
3442                         if (link.link_status == ETH_LINK_DOWN) {
3443                                 all_ports_up = 0;
3444                                 break;
3445                         }
3446                 }
3447                 /* after finally printing all link status, get out */
3448                 if (print_flag == 1)
3449                         break;
3450
3451                 if (all_ports_up == 0) {
3452                         fflush(stdout);
3453                         rte_delay_ms(CHECK_INTERVAL);
3454                 }
3455
3456                 /* set the print_flag if all ports up or timeout */
3457                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3458                         print_flag = 1;
3459                 }
3460
3461                 if (lsc_interrupt)
3462                         break;
3463         }
3464 }
3465
3466 static void
3467 rmv_port_callback(void *arg)
3468 {
3469         int need_to_start = 0;
3470         int org_no_link_check = no_link_check;
3471         portid_t port_id = (intptr_t)arg;
3472         struct rte_eth_dev_info dev_info;
3473         int ret;
3474
3475         RTE_ETH_VALID_PORTID_OR_RET(port_id);
3476
3477         if (!test_done && port_is_forwarding(port_id)) {
3478                 need_to_start = 1;
3479                 stop_packet_forwarding();
3480         }
3481         no_link_check = 1;
3482         stop_port(port_id);
3483         no_link_check = org_no_link_check;
3484
3485         ret = eth_dev_info_get_print_err(port_id, &dev_info);
3486         if (ret != 0)
3487                 TESTPMD_LOG(ERR,
3488                         "Failed to get device info for port %d, not detaching\n",
3489                         port_id);
3490         else {
3491                 struct rte_device *device = dev_info.device;
3492                 close_port(port_id);
3493                 detach_device(device); /* might be already removed or have more ports */
3494         }
3495         if (need_to_start)
3496                 start_packet_forwarding(0);
3497 }
3498
3499 /* This function is used by the interrupt thread */
3500 static int
3501 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3502                   void *ret_param)
3503 {
3504         RTE_SET_USED(param);
3505         RTE_SET_USED(ret_param);
3506
3507         if (type >= RTE_ETH_EVENT_MAX) {
3508                 fprintf(stderr,
3509                         "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3510                         port_id, __func__, type);
3511                 fflush(stderr);
3512         } else if (event_print_mask & (UINT32_C(1) << type)) {
3513                 printf("\nPort %" PRIu16 ": %s event\n", port_id,
3514                         eth_event_desc[type]);
3515                 fflush(stdout);
3516         }
3517
3518         switch (type) {
3519         case RTE_ETH_EVENT_NEW:
3520                 ports[port_id].need_setup = 1;
3521                 ports[port_id].port_status = RTE_PORT_HANDLING;
3522                 break;
3523         case RTE_ETH_EVENT_INTR_RMV:
3524                 if (port_id_is_invalid(port_id, DISABLED_WARN))
3525                         break;
3526                 if (rte_eal_alarm_set(100000,
3527                                 rmv_port_callback, (void *)(intptr_t)port_id))
3528                         fprintf(stderr,
3529                                 "Could not set up deferred device removal\n");
3530                 break;
3531         case RTE_ETH_EVENT_DESTROY:
3532                 ports[port_id].port_status = RTE_PORT_CLOSED;
3533                 printf("Port %u is closed\n", port_id);
3534                 break;
3535         default:
3536                 break;
3537         }
3538         return 0;
3539 }
3540
3541 static int
3542 register_eth_event_callback(void)
3543 {
3544         int ret;
3545         enum rte_eth_event_type event;
3546
3547         for (event = RTE_ETH_EVENT_UNKNOWN;
3548                         event < RTE_ETH_EVENT_MAX; event++) {
3549                 ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3550                                 event,
3551                                 eth_event_callback,
3552                                 NULL);
3553                 if (ret != 0) {
3554                         TESTPMD_LOG(ERR, "Failed to register callback for "
3555                                         "%s event\n", eth_event_desc[event]);
3556                         return -1;
3557                 }
3558         }
3559
3560         return 0;
3561 }
3562
3563 /* This function is used by the interrupt thread */
3564 static void
3565 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3566                              __rte_unused void *arg)
3567 {
3568         uint16_t port_id;
3569         int ret;
3570
3571         if (type >= RTE_DEV_EVENT_MAX) {
3572                 fprintf(stderr, "%s called upon invalid event %d\n",
3573                         __func__, type);
3574                 fflush(stderr);
3575         }
3576
3577         switch (type) {
3578         case RTE_DEV_EVENT_REMOVE:
3579                 RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3580                         device_name);
3581                 ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3582                 if (ret) {
3583                         RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3584                                 device_name);
3585                         return;
3586                 }
3587                 /*
3588                  * Because the user's callback is invoked in eal interrupt
3589                  * callback, the interrupt callback need to be finished before
3590                  * it can be unregistered when detaching device. So finish
3591                  * callback soon and use a deferred removal to detach device
3592                  * is need. It is a workaround, once the device detaching be
3593                  * moved into the eal in the future, the deferred removal could
3594                  * be deleted.
3595                  */
3596                 if (rte_eal_alarm_set(100000,
3597                                 rmv_port_callback, (void *)(intptr_t)port_id))
3598                         RTE_LOG(ERR, EAL,
3599                                 "Could not set up deferred device removal\n");
3600                 break;
3601         case RTE_DEV_EVENT_ADD:
3602                 RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3603                         device_name);
3604                 /* TODO: After finish kernel driver binding,
3605                  * begin to attach port.
3606                  */
3607                 break;
3608         default:
3609                 break;
3610         }
3611 }
3612
3613 static void
3614 rxtx_port_config(struct rte_port *port)
3615 {
3616         uint16_t qid;
3617         uint64_t offloads;
3618
3619         for (qid = 0; qid < nb_rxq; qid++) {
3620                 offloads = port->rx_conf[qid].offloads;
3621                 port->rx_conf[qid] = port->dev_info.default_rxconf;
3622                 if (offloads != 0)
3623                         port->rx_conf[qid].offloads = offloads;
3624
3625                 /* Check if any Rx parameters have been passed */
3626                 if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3627                         port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3628
3629                 if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3630                         port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3631
3632                 if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3633                         port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3634
3635                 if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3636                         port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3637
3638                 if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3639                         port->rx_conf[qid].rx_drop_en = rx_drop_en;
3640
3641                 port->nb_rx_desc[qid] = nb_rxd;
3642         }
3643
3644         for (qid = 0; qid < nb_txq; qid++) {
3645                 offloads = port->tx_conf[qid].offloads;
3646                 port->tx_conf[qid] = port->dev_info.default_txconf;
3647                 if (offloads != 0)
3648                         port->tx_conf[qid].offloads = offloads;
3649
3650                 /* Check if any Tx parameters have been passed */
3651                 if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3652                         port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3653
3654                 if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3655                         port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3656
3657                 if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3658                         port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3659
3660                 if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3661                         port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3662
3663                 if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3664                         port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3665
3666                 port->nb_tx_desc[qid] = nb_txd;
3667         }
3668 }
3669
3670 /*
3671  * Helper function to arrange max_rx_pktlen value and JUMBO_FRAME offload,
3672  * MTU is also aligned if JUMBO_FRAME offload is not set.
3673  *
3674  * port->dev_info should be set before calling this function.
3675  *
3676  * return 0 on success, negative on error
3677  */
3678 int
3679 update_jumbo_frame_offload(portid_t portid)
3680 {
3681         struct rte_port *port = &ports[portid];
3682         uint32_t eth_overhead;
3683         uint64_t rx_offloads;
3684         int ret;
3685         bool on;
3686
3687         /* Update the max_rx_pkt_len to have MTU as RTE_ETHER_MTU */
3688         if (port->dev_info.max_mtu != UINT16_MAX &&
3689             port->dev_info.max_rx_pktlen > port->dev_info.max_mtu)
3690                 eth_overhead = port->dev_info.max_rx_pktlen -
3691                                 port->dev_info.max_mtu;
3692         else
3693                 eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
3694
3695         rx_offloads = port->dev_conf.rxmode.offloads;
3696
3697         /* Default config value is 0 to use PMD specific overhead */
3698         if (port->dev_conf.rxmode.max_rx_pkt_len == 0)
3699                 port->dev_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MTU + eth_overhead;
3700
3701         if (port->dev_conf.rxmode.max_rx_pkt_len <= RTE_ETHER_MTU + eth_overhead) {
3702                 rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3703                 on = false;
3704         } else {
3705                 if ((port->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3706                         fprintf(stderr,
3707                                 "Frame size (%u) is not supported by port %u\n",
3708                                 port->dev_conf.rxmode.max_rx_pkt_len,
3709                                 portid);
3710                         return -1;
3711                 }
3712                 rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3713                 on = true;
3714         }
3715
3716         if (rx_offloads != port->dev_conf.rxmode.offloads) {
3717                 uint16_t qid;
3718
3719                 port->dev_conf.rxmode.offloads = rx_offloads;
3720
3721                 /* Apply JUMBO_FRAME offload configuration to Rx queue(s) */
3722                 for (qid = 0; qid < port->dev_info.nb_rx_queues; qid++) {
3723                         if (on)
3724                                 port->rx_conf[qid].offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3725                         else
3726                                 port->rx_conf[qid].offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3727                 }
3728         }
3729
3730         /* If JUMBO_FRAME is set MTU conversion done by ethdev layer,
3731          * if unset do it here
3732          */
3733         if ((rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3734                 ret = eth_dev_set_mtu_mp(portid,
3735                                 port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead);
3736                 if (ret)
3737                         fprintf(stderr,
3738                                 "Failed to set MTU to %u for port %u\n",
3739                                 port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead,
3740                                 portid);
3741         }
3742
3743         return 0;
3744 }
3745
3746 void
3747 init_port_config(void)
3748 {
3749         portid_t pid;
3750         struct rte_port *port;
3751         int ret, i;
3752
3753         RTE_ETH_FOREACH_DEV(pid) {
3754                 port = &ports[pid];
3755                 port->dev_conf.fdir_conf = fdir_conf;
3756
3757                 ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3758                 if (ret != 0)
3759                         return;
3760
3761                 if (nb_rxq > 1) {
3762                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3763                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3764                                 rss_hf & port->dev_info.flow_type_rss_offloads;
3765                 } else {
3766                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3767                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3768                 }
3769
3770                 if (port->dcb_flag == 0) {
3771                         if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
3772                                 port->dev_conf.rxmode.mq_mode =
3773                                         (enum rte_eth_rx_mq_mode)
3774                                                 (rx_mq_mode & ETH_MQ_RX_RSS);
3775                         } else {
3776                                 port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3777                                 port->dev_conf.rxmode.offloads &=
3778                                                 ~DEV_RX_OFFLOAD_RSS_HASH;
3779
3780                                 for (i = 0;
3781                                      i < port->dev_info.nb_rx_queues;
3782                                      i++)
3783                                         port->rx_conf[i].offloads &=
3784                                                 ~DEV_RX_OFFLOAD_RSS_HASH;
3785                         }
3786                 }
3787
3788                 rxtx_port_config(port);
3789
3790                 ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3791                 if (ret != 0)
3792                         return;
3793
3794 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3795                 rte_pmd_ixgbe_bypass_init(pid);
3796 #endif
3797
3798                 if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3799                         port->dev_conf.intr_conf.lsc = 1;
3800                 if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3801                         port->dev_conf.intr_conf.rmv = 1;
3802         }
3803 }
3804
3805 void set_port_slave_flag(portid_t slave_pid)
3806 {
3807         struct rte_port *port;
3808
3809         port = &ports[slave_pid];
3810         port->slave_flag = 1;
3811 }
3812
3813 void clear_port_slave_flag(portid_t slave_pid)
3814 {
3815         struct rte_port *port;
3816
3817         port = &ports[slave_pid];
3818         port->slave_flag = 0;
3819 }
3820
3821 uint8_t port_is_bonding_slave(portid_t slave_pid)
3822 {
3823         struct rte_port *port;
3824         struct rte_eth_dev_info dev_info;
3825         int ret;
3826
3827         port = &ports[slave_pid];
3828         ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3829         if (ret != 0) {
3830                 TESTPMD_LOG(ERR,
3831                         "Failed to get device info for port id %d,"
3832                         "cannot determine if the port is a bonded slave",
3833                         slave_pid);
3834                 return 0;
3835         }
3836         if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3837                 return 1;
3838         return 0;
3839 }
3840
3841 const uint16_t vlan_tags[] = {
3842                 0,  1,  2,  3,  4,  5,  6,  7,
3843                 8,  9, 10, 11,  12, 13, 14, 15,
3844                 16, 17, 18, 19, 20, 21, 22, 23,
3845                 24, 25, 26, 27, 28, 29, 30, 31
3846 };
3847
3848 static  int
3849 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3850                  enum dcb_mode_enable dcb_mode,
3851                  enum rte_eth_nb_tcs num_tcs,
3852                  uint8_t pfc_en)
3853 {
3854         uint8_t i;
3855         int32_t rc;
3856         struct rte_eth_rss_conf rss_conf;
3857
3858         /*
3859          * Builds up the correct configuration for dcb+vt based on the vlan tags array
3860          * given above, and the number of traffic classes available for use.
3861          */
3862         if (dcb_mode == DCB_VT_ENABLED) {
3863                 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3864                                 &eth_conf->rx_adv_conf.vmdq_dcb_conf;
3865                 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3866                                 &eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3867
3868                 /* VMDQ+DCB RX and TX configurations */
3869                 vmdq_rx_conf->enable_default_pool = 0;
3870                 vmdq_rx_conf->default_pool = 0;
3871                 vmdq_rx_conf->nb_queue_pools =
3872                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3873                 vmdq_tx_conf->nb_queue_pools =
3874                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3875
3876                 vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3877                 for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3878                         vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3879                         vmdq_rx_conf->pool_map[i].pools =
3880                                 1 << (i % vmdq_rx_conf->nb_queue_pools);
3881                 }
3882                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3883                         vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3884                         vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3885                 }
3886
3887                 /* set DCB mode of RX and TX of multiple queues */
3888                 eth_conf->rxmode.mq_mode =
3889                                 (enum rte_eth_rx_mq_mode)
3890                                         (rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3891                 eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3892         } else {
3893                 struct rte_eth_dcb_rx_conf *rx_conf =
3894                                 &eth_conf->rx_adv_conf.dcb_rx_conf;
3895                 struct rte_eth_dcb_tx_conf *tx_conf =
3896                                 &eth_conf->tx_adv_conf.dcb_tx_conf;
3897
3898                 memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3899
3900                 rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3901                 if (rc != 0)
3902                         return rc;
3903
3904                 rx_conf->nb_tcs = num_tcs;
3905                 tx_conf->nb_tcs = num_tcs;
3906
3907                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3908                         rx_conf->dcb_tc[i] = i % num_tcs;
3909                         tx_conf->dcb_tc[i] = i % num_tcs;
3910                 }
3911
3912                 eth_conf->rxmode.mq_mode =
3913                                 (enum rte_eth_rx_mq_mode)
3914                                         (rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3915                 eth_conf->rx_adv_conf.rss_conf = rss_conf;
3916                 eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3917         }
3918
3919         if (pfc_en)
3920                 eth_conf->dcb_capability_en =
3921                                 ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3922         else
3923                 eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3924
3925         return 0;
3926 }
3927
3928 int
3929 init_port_dcb_config(portid_t pid,
3930                      enum dcb_mode_enable dcb_mode,
3931                      enum rte_eth_nb_tcs num_tcs,
3932                      uint8_t pfc_en)
3933 {
3934         struct rte_eth_conf port_conf;
3935         struct rte_port *rte_port;
3936         int retval;
3937         uint16_t i;
3938
3939         if (num_procs > 1) {
3940                 printf("The multi-process feature doesn't support dcb.\n");
3941                 return -ENOTSUP;
3942         }
3943         rte_port = &ports[pid];
3944
3945         /* retain the original device configuration. */
3946         memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
3947
3948         /*set configuration of DCB in vt mode and DCB in non-vt mode*/
3949         retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3950         if (retval < 0)
3951                 return retval;
3952         port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3953
3954         /* re-configure the device . */
3955         retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3956         if (retval < 0)
3957                 return retval;
3958
3959         retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3960         if (retval != 0)
3961                 return retval;
3962
3963         /* If dev_info.vmdq_pool_base is greater than 0,
3964          * the queue id of vmdq pools is started after pf queues.
3965          */
3966         if (dcb_mode == DCB_VT_ENABLED &&
3967             rte_port->dev_info.vmdq_pool_base > 0) {
3968                 fprintf(stderr,
3969                         "VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3970                         pid);
3971                 return -1;
3972         }
3973
3974         /* Assume the ports in testpmd have the same dcb capability
3975          * and has the same number of rxq and txq in dcb mode
3976          */
3977         if (dcb_mode == DCB_VT_ENABLED) {
3978                 if (rte_port->dev_info.max_vfs > 0) {
3979                         nb_rxq = rte_port->dev_info.nb_rx_queues;
3980                         nb_txq = rte_port->dev_info.nb_tx_queues;
3981                 } else {
3982                         nb_rxq = rte_port->dev_info.max_rx_queues;
3983                         nb_txq = rte_port->dev_info.max_tx_queues;
3984                 }
3985         } else {
3986                 /*if vt is disabled, use all pf queues */
3987                 if (rte_port->dev_info.vmdq_pool_base == 0) {
3988                         nb_rxq = rte_port->dev_info.max_rx_queues;
3989                         nb_txq = rte_port->dev_info.max_tx_queues;
3990                 } else {
3991                         nb_rxq = (queueid_t)num_tcs;
3992                         nb_txq = (queueid_t)num_tcs;
3993
3994                 }
3995         }
3996         rx_free_thresh = 64;
3997
3998         memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3999
4000         rxtx_port_config(rte_port);
4001         /* VLAN filter */
4002         rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
4003         for (i = 0; i < RTE_DIM(vlan_tags); i++)
4004                 rx_vft_set(pid, vlan_tags[i], 1);
4005
4006         retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
4007         if (retval != 0)
4008                 return retval;
4009
4010         rte_port->dcb_flag = 1;
4011
4012         /* Enter DCB configuration status */
4013         dcb_config = 1;
4014
4015         return 0;
4016 }
4017
4018 static void
4019 init_port(void)
4020 {
4021         int i;
4022
4023         /* Configuration of Ethernet ports. */
4024         ports = rte_zmalloc("testpmd: ports",
4025                             sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4026                             RTE_CACHE_LINE_SIZE);
4027         if (ports == NULL) {
4028                 rte_exit(EXIT_FAILURE,
4029                                 "rte_zmalloc(%d struct rte_port) failed\n",
4030                                 RTE_MAX_ETHPORTS);
4031         }
4032         for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4033                 ports[i].xstats_info.allocated = false;
4034         for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4035                 LIST_INIT(&ports[i].flow_tunnel_list);
4036         /* Initialize ports NUMA structures */
4037         memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4038         memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4039         memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4040 }
4041
4042 static void
4043 force_quit(void)
4044 {
4045         pmd_test_exit();
4046         prompt_exit();
4047 }
4048
4049 static void
4050 print_stats(void)
4051 {
4052         uint8_t i;
4053         const char clr[] = { 27, '[', '2', 'J', '\0' };
4054         const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4055
4056         /* Clear screen and move to top left */
4057         printf("%s%s", clr, top_left);
4058
4059         printf("\nPort statistics ====================================");
4060         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4061                 nic_stats_display(fwd_ports_ids[i]);
4062
4063         fflush(stdout);
4064 }
4065
4066 static void
4067 signal_handler(int signum)
4068 {
4069         if (signum == SIGINT || signum == SIGTERM) {
4070                 fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4071                         signum);
4072 #ifdef RTE_LIB_PDUMP
4073                 /* uninitialize packet capture framework */
4074                 rte_pdump_uninit();
4075 #endif
4076 #ifdef RTE_LIB_LATENCYSTATS
4077                 if (latencystats_enabled != 0)
4078                         rte_latencystats_uninit();
4079 #endif
4080                 force_quit();
4081                 /* Set flag to indicate the force termination. */
4082                 f_quit = 1;
4083                 /* exit with the expected status */
4084 #ifndef RTE_EXEC_ENV_WINDOWS
4085                 signal(signum, SIG_DFL);
4086                 kill(getpid(), signum);
4087 #endif
4088         }
4089 }
4090
4091 int
4092 main(int argc, char** argv)
4093 {
4094         int diag;
4095         portid_t port_id;
4096         uint16_t count;
4097         int ret;
4098
4099         signal(SIGINT, signal_handler);
4100         signal(SIGTERM, signal_handler);
4101
4102         testpmd_logtype = rte_log_register("testpmd");
4103         if (testpmd_logtype < 0)
4104                 rte_exit(EXIT_FAILURE, "Cannot register log type");
4105         rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4106
4107         diag = rte_eal_init(argc, argv);
4108         if (diag < 0)
4109                 rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4110                          rte_strerror(rte_errno));
4111
4112         ret = register_eth_event_callback();
4113         if (ret != 0)
4114                 rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4115
4116 #ifdef RTE_LIB_PDUMP
4117         /* initialize packet capture framework */
4118         rte_pdump_init();
4119 #endif
4120
4121         count = 0;
4122         RTE_ETH_FOREACH_DEV(port_id) {
4123                 ports_ids[count] = port_id;
4124                 count++;
4125         }
4126         nb_ports = (portid_t) count;
4127         if (nb_ports == 0)
4128                 TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4129
4130         /* allocate port structures, and init them */
4131         init_port();
4132
4133         set_def_fwd_config();
4134         if (nb_lcores == 0)
4135                 rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4136                          "Check the core mask argument\n");
4137
4138         /* Bitrate/latency stats disabled by default */
4139 #ifdef RTE_LIB_BITRATESTATS
4140         bitrate_enabled = 0;
4141 #endif
4142 #ifdef RTE_LIB_LATENCYSTATS
4143         latencystats_enabled = 0;
4144 #endif
4145
4146         /* on FreeBSD, mlockall() is disabled by default */
4147 #ifdef RTE_EXEC_ENV_FREEBSD
4148         do_mlockall = 0;
4149 #else
4150         do_mlockall = 1;
4151 #endif
4152
4153         argc -= diag;
4154         argv += diag;
4155         if (argc > 1)
4156                 launch_args_parse(argc, argv);
4157
4158 #ifndef RTE_EXEC_ENV_WINDOWS
4159         if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4160                 TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4161                         strerror(errno));
4162         }
4163 #endif
4164
4165         if (tx_first && interactive)
4166                 rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4167                                 "interactive mode.\n");
4168
4169         if (tx_first && lsc_interrupt) {
4170                 fprintf(stderr,
4171                         "Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4172                 lsc_interrupt = 0;
4173         }
4174
4175         if (!nb_rxq && !nb_txq)
4176                 fprintf(stderr,
4177                         "Warning: Either rx or tx queues should be non-zero\n");
4178
4179         if (nb_rxq > 1 && nb_rxq > nb_txq)
4180                 fprintf(stderr,
4181                         "Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4182                         nb_rxq, nb_txq);
4183
4184         init_config();
4185
4186         if (hot_plug) {
4187                 ret = rte_dev_hotplug_handle_enable();
4188                 if (ret) {
4189                         RTE_LOG(ERR, EAL,
4190                                 "fail to enable hotplug handling.");
4191                         return -1;
4192                 }
4193
4194                 ret = rte_dev_event_monitor_start();
4195                 if (ret) {
4196                         RTE_LOG(ERR, EAL,
4197                                 "fail to start device event monitoring.");
4198                         return -1;
4199                 }
4200
4201                 ret = rte_dev_event_callback_register(NULL,
4202                         dev_event_callback, NULL);
4203                 if (ret) {
4204                         RTE_LOG(ERR, EAL,
4205                                 "fail  to register device event callback\n");
4206                         return -1;
4207                 }
4208         }
4209
4210         if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4211                 rte_exit(EXIT_FAILURE, "Start ports failed\n");
4212
4213         /* set all ports to promiscuous mode by default */
4214         RTE_ETH_FOREACH_DEV(port_id) {
4215                 ret = rte_eth_promiscuous_enable(port_id);
4216                 if (ret != 0)
4217                         fprintf(stderr,
4218                                 "Error during enabling promiscuous mode for port %u: %s - ignore\n",
4219                                 port_id, rte_strerror(-ret));
4220         }
4221
4222         /* Init metrics library */
4223         rte_metrics_init(rte_socket_id());
4224
4225 #ifdef RTE_LIB_LATENCYSTATS
4226         if (latencystats_enabled != 0) {
4227                 int ret = rte_latencystats_init(1, NULL);
4228                 if (ret)
4229                         fprintf(stderr,
4230                                 "Warning: latencystats init() returned error %d\n",
4231                                 ret);
4232                 fprintf(stderr, "Latencystats running on lcore %d\n",
4233                         latencystats_lcore_id);
4234         }
4235 #endif
4236
4237         /* Setup bitrate stats */
4238 #ifdef RTE_LIB_BITRATESTATS
4239         if (bitrate_enabled != 0) {
4240                 bitrate_data = rte_stats_bitrate_create();
4241                 if (bitrate_data == NULL)
4242                         rte_exit(EXIT_FAILURE,
4243                                 "Could not allocate bitrate data.\n");
4244                 rte_stats_bitrate_reg(bitrate_data);
4245         }
4246 #endif
4247
4248 #ifdef RTE_LIB_CMDLINE
4249         if (strlen(cmdline_filename) != 0)
4250                 cmdline_read_from_file(cmdline_filename);
4251
4252         if (interactive == 1) {
4253                 if (auto_start) {
4254                         printf("Start automatic packet forwarding\n");
4255                         start_packet_forwarding(0);
4256                 }
4257                 prompt();
4258                 pmd_test_exit();
4259         } else
4260 #endif
4261         {
4262                 char c;
4263                 int rc;
4264
4265                 f_quit = 0;
4266
4267                 printf("No commandline core given, start packet forwarding\n");
4268                 start_packet_forwarding(tx_first);
4269                 if (stats_period != 0) {
4270                         uint64_t prev_time = 0, cur_time, diff_time = 0;
4271                         uint64_t timer_period;
4272
4273                         /* Convert to number of cycles */
4274                         timer_period = stats_period * rte_get_timer_hz();
4275
4276                         while (f_quit == 0) {
4277                                 cur_time = rte_get_timer_cycles();
4278                                 diff_time += cur_time - prev_time;
4279
4280                                 if (diff_time >= timer_period) {
4281                                         print_stats();
4282                                         /* Reset the timer */
4283                                         diff_time = 0;
4284                                 }
4285                                 /* Sleep to avoid unnecessary checks */
4286                                 prev_time = cur_time;
4287                                 rte_delay_us_sleep(US_PER_S);
4288                         }
4289                 }
4290
4291                 printf("Press enter to exit\n");
4292                 rc = read(0, &c, 1);
4293                 pmd_test_exit();
4294                 if (rc < 0)
4295                         return 1;
4296         }
4297
4298         ret = rte_eal_cleanup();
4299         if (ret != 0)
4300                 rte_exit(EXIT_FAILURE,
4301                          "EAL cleanup failed: %s\n", strerror(-ret));
4302
4303         return EXIT_SUCCESS;
4304 }