app/testpmd: add forwarding engine for shared Rx queue
[dpdk.git] / app / test-pmd / testpmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_atomic.h>
40 #include <rte_branch_prediction.h>
41 #include <rte_mempool.h>
42 #include <rte_malloc.h>
43 #include <rte_mbuf.h>
44 #include <rte_mbuf_pool_ops.h>
45 #include <rte_interrupts.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_dev.h>
50 #include <rte_string_fns.h>
51 #ifdef RTE_NET_IXGBE
52 #include <rte_pmd_ixgbe.h>
53 #endif
54 #ifdef RTE_LIB_PDUMP
55 #include <rte_pdump.h>
56 #endif
57 #include <rte_flow.h>
58 #include <rte_metrics.h>
59 #ifdef RTE_LIB_BITRATESTATS
60 #include <rte_bitrate.h>
61 #endif
62 #ifdef RTE_LIB_LATENCYSTATS
63 #include <rte_latencystats.h>
64 #endif
65 #ifdef RTE_EXEC_ENV_WINDOWS
66 #include <process.h>
67 #endif
68
69 #include "testpmd.h"
70
71 #ifndef MAP_HUGETLB
72 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
73 #define HUGE_FLAG (0x40000)
74 #else
75 #define HUGE_FLAG MAP_HUGETLB
76 #endif
77
78 #ifndef MAP_HUGE_SHIFT
79 /* older kernels (or FreeBSD) will not have this define */
80 #define HUGE_SHIFT (26)
81 #else
82 #define HUGE_SHIFT MAP_HUGE_SHIFT
83 #endif
84
85 #define EXTMEM_HEAP_NAME "extmem"
86 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
87
88 uint16_t verbose_level = 0; /**< Silent by default. */
89 int testpmd_logtype; /**< Log type for testpmd logs */
90
91 /* use main core for command line ? */
92 uint8_t interactive = 0;
93 uint8_t auto_start = 0;
94 uint8_t tx_first;
95 char cmdline_filename[PATH_MAX] = {0};
96
97 /*
98  * NUMA support configuration.
99  * When set, the NUMA support attempts to dispatch the allocation of the
100  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
101  * probed ports among the CPU sockets 0 and 1.
102  * Otherwise, all memory is allocated from CPU socket 0.
103  */
104 uint8_t numa_support = 1; /**< numa enabled by default */
105
106 /*
107  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
108  * not configured.
109  */
110 uint8_t socket_num = UMA_NO_CONFIG;
111
112 /*
113  * Select mempool allocation type:
114  * - native: use regular DPDK memory
115  * - anon: use regular DPDK memory to create mempool, but populate using
116  *         anonymous memory (may not be IOVA-contiguous)
117  * - xmem: use externally allocated hugepage memory
118  */
119 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
120
121 /*
122  * Store specified sockets on which memory pool to be used by ports
123  * is allocated.
124  */
125 uint8_t port_numa[RTE_MAX_ETHPORTS];
126
127 /*
128  * Store specified sockets on which RX ring to be used by ports
129  * is allocated.
130  */
131 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
132
133 /*
134  * Store specified sockets on which TX ring to be used by ports
135  * is allocated.
136  */
137 uint8_t txring_numa[RTE_MAX_ETHPORTS];
138
139 /*
140  * Record the Ethernet address of peer target ports to which packets are
141  * forwarded.
142  * Must be instantiated with the ethernet addresses of peer traffic generator
143  * ports.
144  */
145 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
146 portid_t nb_peer_eth_addrs = 0;
147
148 /*
149  * Probed Target Environment.
150  */
151 struct rte_port *ports;        /**< For all probed ethernet ports. */
152 portid_t nb_ports;             /**< Number of probed ethernet ports. */
153 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
154 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
155
156 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
157
158 /*
159  * Test Forwarding Configuration.
160  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
161  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
162  */
163 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
164 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
165 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
166 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
167
168 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
169 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
170
171 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
172 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
173
174 /*
175  * Forwarding engines.
176  */
177 struct fwd_engine * fwd_engines[] = {
178         &io_fwd_engine,
179         &mac_fwd_engine,
180         &mac_swap_engine,
181         &flow_gen_engine,
182         &rx_only_engine,
183         &tx_only_engine,
184         &csum_fwd_engine,
185         &icmp_echo_engine,
186         &noisy_vnf_engine,
187         &five_tuple_swap_fwd_engine,
188 #ifdef RTE_LIBRTE_IEEE1588
189         &ieee1588_fwd_engine,
190 #endif
191         &shared_rxq_engine,
192         NULL,
193 };
194
195 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
196 uint16_t mempool_flags;
197
198 struct fwd_config cur_fwd_config;
199 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
200 uint32_t retry_enabled;
201 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
202 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
203
204 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
205 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
206         DEFAULT_MBUF_DATA_SIZE
207 }; /**< Mbuf data space size. */
208 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
209                                       * specified on command-line. */
210 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
211
212 /** Extended statistics to show. */
213 struct rte_eth_xstat_name *xstats_display;
214
215 unsigned int xstats_display_num; /**< Size of extended statistics to show */
216
217 /*
218  * In container, it cannot terminate the process which running with 'stats-period'
219  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
220  */
221 uint8_t f_quit;
222
223 /*
224  * Max Rx frame size, set by '--max-pkt-len' parameter.
225  */
226 uint32_t max_rx_pkt_len;
227
228 /*
229  * Configuration of packet segments used to scatter received packets
230  * if some of split features is configured.
231  */
232 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
233 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
234 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
235 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
236
237 /*
238  * Configuration of packet segments used by the "txonly" processing engine.
239  */
240 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
241 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
242         TXONLY_DEF_PACKET_LEN,
243 };
244 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
245
246 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
247 /**< Split policy for packets to TX. */
248
249 uint8_t txonly_multi_flow;
250 /**< Whether multiple flows are generated in TXONLY mode. */
251
252 uint32_t tx_pkt_times_inter;
253 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
254
255 uint32_t tx_pkt_times_intra;
256 /**< Timings for send scheduling in TXONLY mode, time between packets. */
257
258 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
259 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
260 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
261 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
262
263 /* current configuration is in DCB or not,0 means it is not in DCB mode */
264 uint8_t dcb_config = 0;
265
266 /*
267  * Configurable number of RX/TX queues.
268  */
269 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
270 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
271 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
272
273 /*
274  * Configurable number of RX/TX ring descriptors.
275  * Defaults are supplied by drivers via ethdev.
276  */
277 #define RTE_TEST_RX_DESC_DEFAULT 0
278 #define RTE_TEST_TX_DESC_DEFAULT 0
279 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
280 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
281
282 #define RTE_PMD_PARAM_UNSET -1
283 /*
284  * Configurable values of RX and TX ring threshold registers.
285  */
286
287 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
288 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
289 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
290
291 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
292 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
293 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
294
295 /*
296  * Configurable value of RX free threshold.
297  */
298 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
299
300 /*
301  * Configurable value of RX drop enable.
302  */
303 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
304
305 /*
306  * Configurable value of TX free threshold.
307  */
308 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
309
310 /*
311  * Configurable value of TX RS bit threshold.
312  */
313 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
314
315 /*
316  * Configurable value of buffered packets before sending.
317  */
318 uint16_t noisy_tx_sw_bufsz;
319
320 /*
321  * Configurable value of packet buffer timeout.
322  */
323 uint16_t noisy_tx_sw_buf_flush_time;
324
325 /*
326  * Configurable value for size of VNF internal memory area
327  * used for simulating noisy neighbour behaviour
328  */
329 uint64_t noisy_lkup_mem_sz;
330
331 /*
332  * Configurable value of number of random writes done in
333  * VNF simulation memory area.
334  */
335 uint64_t noisy_lkup_num_writes;
336
337 /*
338  * Configurable value of number of random reads done in
339  * VNF simulation memory area.
340  */
341 uint64_t noisy_lkup_num_reads;
342
343 /*
344  * Configurable value of number of random reads/writes done in
345  * VNF simulation memory area.
346  */
347 uint64_t noisy_lkup_num_reads_writes;
348
349 /*
350  * Receive Side Scaling (RSS) configuration.
351  */
352 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
353
354 /*
355  * Port topology configuration
356  */
357 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
358
359 /*
360  * Avoids to flush all the RX streams before starts forwarding.
361  */
362 uint8_t no_flush_rx = 0; /* flush by default */
363
364 /*
365  * Flow API isolated mode.
366  */
367 uint8_t flow_isolate_all;
368
369 /*
370  * Avoids to check link status when starting/stopping a port.
371  */
372 uint8_t no_link_check = 0; /* check by default */
373
374 /*
375  * Don't automatically start all ports in interactive mode.
376  */
377 uint8_t no_device_start = 0;
378
379 /*
380  * Enable link status change notification
381  */
382 uint8_t lsc_interrupt = 1; /* enabled by default */
383
384 /*
385  * Enable device removal notification.
386  */
387 uint8_t rmv_interrupt = 1; /* enabled by default */
388
389 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
390
391 /* After attach, port setup is called on event or by iterator */
392 bool setup_on_probe_event = true;
393
394 /* Clear ptypes on port initialization. */
395 uint8_t clear_ptypes = true;
396
397 /* Hairpin ports configuration mode. */
398 uint16_t hairpin_mode;
399
400 /* Pretty printing of ethdev events */
401 static const char * const eth_event_desc[] = {
402         [RTE_ETH_EVENT_UNKNOWN] = "unknown",
403         [RTE_ETH_EVENT_INTR_LSC] = "link state change",
404         [RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
405         [RTE_ETH_EVENT_INTR_RESET] = "reset",
406         [RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
407         [RTE_ETH_EVENT_IPSEC] = "IPsec",
408         [RTE_ETH_EVENT_MACSEC] = "MACsec",
409         [RTE_ETH_EVENT_INTR_RMV] = "device removal",
410         [RTE_ETH_EVENT_NEW] = "device probed",
411         [RTE_ETH_EVENT_DESTROY] = "device released",
412         [RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
413         [RTE_ETH_EVENT_MAX] = NULL,
414 };
415
416 /*
417  * Display or mask ether events
418  * Default to all events except VF_MBOX
419  */
420 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
421                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
422                             (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
423                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
424                             (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
425                             (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
426                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
427                             (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
428 /*
429  * Decide if all memory are locked for performance.
430  */
431 int do_mlockall = 0;
432
433 /*
434  * NIC bypass mode configuration options.
435  */
436
437 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
438 /* The NIC bypass watchdog timeout. */
439 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
440 #endif
441
442
443 #ifdef RTE_LIB_LATENCYSTATS
444
445 /*
446  * Set when latency stats is enabled in the commandline
447  */
448 uint8_t latencystats_enabled;
449
450 /*
451  * Lcore ID to serive latency statistics.
452  */
453 lcoreid_t latencystats_lcore_id = -1;
454
455 #endif
456
457 /*
458  * Ethernet device configuration.
459  */
460 struct rte_eth_rxmode rx_mode;
461
462 struct rte_eth_txmode tx_mode = {
463         .offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
464 };
465
466 struct rte_fdir_conf fdir_conf = {
467         .mode = RTE_FDIR_MODE_NONE,
468         .pballoc = RTE_FDIR_PBALLOC_64K,
469         .status = RTE_FDIR_REPORT_STATUS,
470         .mask = {
471                 .vlan_tci_mask = 0xFFEF,
472                 .ipv4_mask     = {
473                         .src_ip = 0xFFFFFFFF,
474                         .dst_ip = 0xFFFFFFFF,
475                 },
476                 .ipv6_mask     = {
477                         .src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
478                         .dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
479                 },
480                 .src_port_mask = 0xFFFF,
481                 .dst_port_mask = 0xFFFF,
482                 .mac_addr_byte_mask = 0xFF,
483                 .tunnel_type_mask = 1,
484                 .tunnel_id_mask = 0xFFFFFFFF,
485         },
486         .drop_queue = 127,
487 };
488
489 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
490
491 /*
492  * Display zero values by default for xstats
493  */
494 uint8_t xstats_hide_zero;
495
496 /*
497  * Measure of CPU cycles disabled by default
498  */
499 uint8_t record_core_cycles;
500
501 /*
502  * Display of RX and TX bursts disabled by default
503  */
504 uint8_t record_burst_stats;
505
506 /*
507  * Number of ports per shared Rx queue group, 0 disable.
508  */
509 uint32_t rxq_share;
510
511 unsigned int num_sockets = 0;
512 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
513
514 #ifdef RTE_LIB_BITRATESTATS
515 /* Bitrate statistics */
516 struct rte_stats_bitrates *bitrate_data;
517 lcoreid_t bitrate_lcore_id;
518 uint8_t bitrate_enabled;
519 #endif
520
521 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
522 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
523
524 /*
525  * hexadecimal bitmask of RX mq mode can be enabled.
526  */
527 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
528
529 /*
530  * Used to set forced link speed
531  */
532 uint32_t eth_link_speed;
533
534 /*
535  * ID of the current process in multi-process, used to
536  * configure the queues to be polled.
537  */
538 int proc_id;
539
540 /*
541  * Number of processes in multi-process, used to
542  * configure the queues to be polled.
543  */
544 unsigned int num_procs = 1;
545
546 static void
547 eth_rx_metadata_negotiate_mp(uint16_t port_id)
548 {
549         uint64_t rx_meta_features = 0;
550         int ret;
551
552         if (!is_proc_primary())
553                 return;
554
555         rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
556         rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
557         rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
558
559         ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
560         if (ret == 0) {
561                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
562                         TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
563                                     port_id);
564                 }
565
566                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
567                         TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
568                                     port_id);
569                 }
570
571                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
572                         TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
573                                     port_id);
574                 }
575         } else if (ret != -ENOTSUP) {
576                 rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
577                          port_id, rte_strerror(-ret));
578         }
579 }
580
581 static void
582 flow_pick_transfer_proxy_mp(uint16_t port_id)
583 {
584         struct rte_port *port = &ports[port_id];
585         int ret;
586
587         port->flow_transfer_proxy = port_id;
588
589         if (!is_proc_primary())
590                 return;
591
592         ret = rte_flow_pick_transfer_proxy(port_id, &port->flow_transfer_proxy,
593                                            NULL);
594         if (ret != 0) {
595                 fprintf(stderr, "Error picking flow transfer proxy for port %u: %s - ignore\n",
596                         port_id, rte_strerror(-ret));
597         }
598 }
599
600 static int
601 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
602                       const struct rte_eth_conf *dev_conf)
603 {
604         if (is_proc_primary())
605                 return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
606                                         dev_conf);
607         return 0;
608 }
609
610 static int
611 eth_dev_start_mp(uint16_t port_id)
612 {
613         if (is_proc_primary())
614                 return rte_eth_dev_start(port_id);
615
616         return 0;
617 }
618
619 static int
620 eth_dev_stop_mp(uint16_t port_id)
621 {
622         if (is_proc_primary())
623                 return rte_eth_dev_stop(port_id);
624
625         return 0;
626 }
627
628 static void
629 mempool_free_mp(struct rte_mempool *mp)
630 {
631         if (is_proc_primary())
632                 rte_mempool_free(mp);
633 }
634
635 static int
636 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
637 {
638         if (is_proc_primary())
639                 return rte_eth_dev_set_mtu(port_id, mtu);
640
641         return 0;
642 }
643
644 /* Forward function declarations */
645 static void setup_attached_port(portid_t pi);
646 static void check_all_ports_link_status(uint32_t port_mask);
647 static int eth_event_callback(portid_t port_id,
648                               enum rte_eth_event_type type,
649                               void *param, void *ret_param);
650 static void dev_event_callback(const char *device_name,
651                                 enum rte_dev_event_type type,
652                                 void *param);
653 static void fill_xstats_display_info(void);
654
655 /*
656  * Check if all the ports are started.
657  * If yes, return positive value. If not, return zero.
658  */
659 static int all_ports_started(void);
660
661 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
662 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
663
664 /* Holds the registered mbuf dynamic flags names. */
665 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
666
667
668 /*
669  * Helper function to check if socket is already discovered.
670  * If yes, return positive value. If not, return zero.
671  */
672 int
673 new_socket_id(unsigned int socket_id)
674 {
675         unsigned int i;
676
677         for (i = 0; i < num_sockets; i++) {
678                 if (socket_ids[i] == socket_id)
679                         return 0;
680         }
681         return 1;
682 }
683
684 /*
685  * Setup default configuration.
686  */
687 static void
688 set_default_fwd_lcores_config(void)
689 {
690         unsigned int i;
691         unsigned int nb_lc;
692         unsigned int sock_num;
693
694         nb_lc = 0;
695         for (i = 0; i < RTE_MAX_LCORE; i++) {
696                 if (!rte_lcore_is_enabled(i))
697                         continue;
698                 sock_num = rte_lcore_to_socket_id(i);
699                 if (new_socket_id(sock_num)) {
700                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
701                                 rte_exit(EXIT_FAILURE,
702                                          "Total sockets greater than %u\n",
703                                          RTE_MAX_NUMA_NODES);
704                         }
705                         socket_ids[num_sockets++] = sock_num;
706                 }
707                 if (i == rte_get_main_lcore())
708                         continue;
709                 fwd_lcores_cpuids[nb_lc++] = i;
710         }
711         nb_lcores = (lcoreid_t) nb_lc;
712         nb_cfg_lcores = nb_lcores;
713         nb_fwd_lcores = 1;
714 }
715
716 static void
717 set_def_peer_eth_addrs(void)
718 {
719         portid_t i;
720
721         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
722                 peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
723                 peer_eth_addrs[i].addr_bytes[5] = i;
724         }
725 }
726
727 static void
728 set_default_fwd_ports_config(void)
729 {
730         portid_t pt_id;
731         int i = 0;
732
733         RTE_ETH_FOREACH_DEV(pt_id) {
734                 fwd_ports_ids[i++] = pt_id;
735
736                 /* Update sockets info according to the attached device */
737                 int socket_id = rte_eth_dev_socket_id(pt_id);
738                 if (socket_id >= 0 && new_socket_id(socket_id)) {
739                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
740                                 rte_exit(EXIT_FAILURE,
741                                          "Total sockets greater than %u\n",
742                                          RTE_MAX_NUMA_NODES);
743                         }
744                         socket_ids[num_sockets++] = socket_id;
745                 }
746         }
747
748         nb_cfg_ports = nb_ports;
749         nb_fwd_ports = nb_ports;
750 }
751
752 void
753 set_def_fwd_config(void)
754 {
755         set_default_fwd_lcores_config();
756         set_def_peer_eth_addrs();
757         set_default_fwd_ports_config();
758 }
759
760 #ifndef RTE_EXEC_ENV_WINDOWS
761 /* extremely pessimistic estimation of memory required to create a mempool */
762 static int
763 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
764 {
765         unsigned int n_pages, mbuf_per_pg, leftover;
766         uint64_t total_mem, mbuf_mem, obj_sz;
767
768         /* there is no good way to predict how much space the mempool will
769          * occupy because it will allocate chunks on the fly, and some of those
770          * will come from default DPDK memory while some will come from our
771          * external memory, so just assume 128MB will be enough for everyone.
772          */
773         uint64_t hdr_mem = 128 << 20;
774
775         /* account for possible non-contiguousness */
776         obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
777         if (obj_sz > pgsz) {
778                 TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
779                 return -1;
780         }
781
782         mbuf_per_pg = pgsz / obj_sz;
783         leftover = (nb_mbufs % mbuf_per_pg) > 0;
784         n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
785
786         mbuf_mem = n_pages * pgsz;
787
788         total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
789
790         if (total_mem > SIZE_MAX) {
791                 TESTPMD_LOG(ERR, "Memory size too big\n");
792                 return -1;
793         }
794         *out = (size_t)total_mem;
795
796         return 0;
797 }
798
799 static int
800 pagesz_flags(uint64_t page_sz)
801 {
802         /* as per mmap() manpage, all page sizes are log2 of page size
803          * shifted by MAP_HUGE_SHIFT
804          */
805         int log2 = rte_log2_u64(page_sz);
806
807         return (log2 << HUGE_SHIFT);
808 }
809
810 static void *
811 alloc_mem(size_t memsz, size_t pgsz, bool huge)
812 {
813         void *addr;
814         int flags;
815
816         /* allocate anonymous hugepages */
817         flags = MAP_ANONYMOUS | MAP_PRIVATE;
818         if (huge)
819                 flags |= HUGE_FLAG | pagesz_flags(pgsz);
820
821         addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
822         if (addr == MAP_FAILED)
823                 return NULL;
824
825         return addr;
826 }
827
828 struct extmem_param {
829         void *addr;
830         size_t len;
831         size_t pgsz;
832         rte_iova_t *iova_table;
833         unsigned int iova_table_len;
834 };
835
836 static int
837 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
838                 bool huge)
839 {
840         uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
841                         RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
842         unsigned int cur_page, n_pages, pgsz_idx;
843         size_t mem_sz, cur_pgsz;
844         rte_iova_t *iovas = NULL;
845         void *addr;
846         int ret;
847
848         for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
849                 /* skip anything that is too big */
850                 if (pgsizes[pgsz_idx] > SIZE_MAX)
851                         continue;
852
853                 cur_pgsz = pgsizes[pgsz_idx];
854
855                 /* if we were told not to allocate hugepages, override */
856                 if (!huge)
857                         cur_pgsz = sysconf(_SC_PAGESIZE);
858
859                 ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
860                 if (ret < 0) {
861                         TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
862                         return -1;
863                 }
864
865                 /* allocate our memory */
866                 addr = alloc_mem(mem_sz, cur_pgsz, huge);
867
868                 /* if we couldn't allocate memory with a specified page size,
869                  * that doesn't mean we can't do it with other page sizes, so
870                  * try another one.
871                  */
872                 if (addr == NULL)
873                         continue;
874
875                 /* store IOVA addresses for every page in this memory area */
876                 n_pages = mem_sz / cur_pgsz;
877
878                 iovas = malloc(sizeof(*iovas) * n_pages);
879
880                 if (iovas == NULL) {
881                         TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
882                         goto fail;
883                 }
884                 /* lock memory if it's not huge pages */
885                 if (!huge)
886                         mlock(addr, mem_sz);
887
888                 /* populate IOVA addresses */
889                 for (cur_page = 0; cur_page < n_pages; cur_page++) {
890                         rte_iova_t iova;
891                         size_t offset;
892                         void *cur;
893
894                         offset = cur_pgsz * cur_page;
895                         cur = RTE_PTR_ADD(addr, offset);
896
897                         /* touch the page before getting its IOVA */
898                         *(volatile char *)cur = 0;
899
900                         iova = rte_mem_virt2iova(cur);
901
902                         iovas[cur_page] = iova;
903                 }
904
905                 break;
906         }
907         /* if we couldn't allocate anything */
908         if (iovas == NULL)
909                 return -1;
910
911         param->addr = addr;
912         param->len = mem_sz;
913         param->pgsz = cur_pgsz;
914         param->iova_table = iovas;
915         param->iova_table_len = n_pages;
916
917         return 0;
918 fail:
919         if (iovas)
920                 free(iovas);
921         if (addr)
922                 munmap(addr, mem_sz);
923
924         return -1;
925 }
926
927 static int
928 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
929 {
930         struct extmem_param param;
931         int socket_id, ret;
932
933         memset(&param, 0, sizeof(param));
934
935         /* check if our heap exists */
936         socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
937         if (socket_id < 0) {
938                 /* create our heap */
939                 ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
940                 if (ret < 0) {
941                         TESTPMD_LOG(ERR, "Cannot create heap\n");
942                         return -1;
943                 }
944         }
945
946         ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
947         if (ret < 0) {
948                 TESTPMD_LOG(ERR, "Cannot create memory area\n");
949                 return -1;
950         }
951
952         /* we now have a valid memory area, so add it to heap */
953         ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
954                         param.addr, param.len, param.iova_table,
955                         param.iova_table_len, param.pgsz);
956
957         /* when using VFIO, memory is automatically mapped for DMA by EAL */
958
959         /* not needed any more */
960         free(param.iova_table);
961
962         if (ret < 0) {
963                 TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
964                 munmap(param.addr, param.len);
965                 return -1;
966         }
967
968         /* success */
969
970         TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
971                         param.len >> 20);
972
973         return 0;
974 }
975 static void
976 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
977              struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
978 {
979         uint16_t pid = 0;
980         int ret;
981
982         RTE_ETH_FOREACH_DEV(pid) {
983                 struct rte_eth_dev_info dev_info;
984
985                 ret = eth_dev_info_get_print_err(pid, &dev_info);
986                 if (ret != 0) {
987                         TESTPMD_LOG(DEBUG,
988                                     "unable to get device info for port %d on addr 0x%p,"
989                                     "mempool unmapping will not be performed\n",
990                                     pid, memhdr->addr);
991                         continue;
992                 }
993
994                 ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
995                 if (ret) {
996                         TESTPMD_LOG(DEBUG,
997                                     "unable to DMA unmap addr 0x%p "
998                                     "for device %s\n",
999                                     memhdr->addr, dev_info.device->name);
1000                 }
1001         }
1002         ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
1003         if (ret) {
1004                 TESTPMD_LOG(DEBUG,
1005                             "unable to un-register addr 0x%p\n", memhdr->addr);
1006         }
1007 }
1008
1009 static void
1010 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1011            struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1012 {
1013         uint16_t pid = 0;
1014         size_t page_size = sysconf(_SC_PAGESIZE);
1015         int ret;
1016
1017         ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1018                                   page_size);
1019         if (ret) {
1020                 TESTPMD_LOG(DEBUG,
1021                             "unable to register addr 0x%p\n", memhdr->addr);
1022                 return;
1023         }
1024         RTE_ETH_FOREACH_DEV(pid) {
1025                 struct rte_eth_dev_info dev_info;
1026
1027                 ret = eth_dev_info_get_print_err(pid, &dev_info);
1028                 if (ret != 0) {
1029                         TESTPMD_LOG(DEBUG,
1030                                     "unable to get device info for port %d on addr 0x%p,"
1031                                     "mempool mapping will not be performed\n",
1032                                     pid, memhdr->addr);
1033                         continue;
1034                 }
1035                 ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1036                 if (ret) {
1037                         TESTPMD_LOG(DEBUG,
1038                                     "unable to DMA map addr 0x%p "
1039                                     "for device %s\n",
1040                                     memhdr->addr, dev_info.device->name);
1041                 }
1042         }
1043 }
1044 #endif
1045
1046 static unsigned int
1047 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1048             char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1049 {
1050         struct rte_pktmbuf_extmem *xmem;
1051         unsigned int ext_num, zone_num, elt_num;
1052         uint16_t elt_size;
1053
1054         elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1055         elt_num = EXTBUF_ZONE_SIZE / elt_size;
1056         zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1057
1058         xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1059         if (xmem == NULL) {
1060                 TESTPMD_LOG(ERR, "Cannot allocate memory for "
1061                                  "external buffer descriptors\n");
1062                 *ext_mem = NULL;
1063                 return 0;
1064         }
1065         for (ext_num = 0; ext_num < zone_num; ext_num++) {
1066                 struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1067                 const struct rte_memzone *mz;
1068                 char mz_name[RTE_MEMZONE_NAMESIZE];
1069                 int ret;
1070
1071                 ret = snprintf(mz_name, sizeof(mz_name),
1072                         RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1073                 if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1074                         errno = ENAMETOOLONG;
1075                         ext_num = 0;
1076                         break;
1077                 }
1078                 mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
1079                                                  socket_id,
1080                                                  RTE_MEMZONE_IOVA_CONTIG |
1081                                                  RTE_MEMZONE_1GB |
1082                                                  RTE_MEMZONE_SIZE_HINT_ONLY,
1083                                                  EXTBUF_ZONE_SIZE);
1084                 if (mz == NULL) {
1085                         /*
1086                          * The caller exits on external buffer creation
1087                          * error, so there is no need to free memzones.
1088                          */
1089                         errno = ENOMEM;
1090                         ext_num = 0;
1091                         break;
1092                 }
1093                 xseg->buf_ptr = mz->addr;
1094                 xseg->buf_iova = mz->iova;
1095                 xseg->buf_len = EXTBUF_ZONE_SIZE;
1096                 xseg->elt_size = elt_size;
1097         }
1098         if (ext_num == 0 && xmem != NULL) {
1099                 free(xmem);
1100                 xmem = NULL;
1101         }
1102         *ext_mem = xmem;
1103         return ext_num;
1104 }
1105
1106 /*
1107  * Configuration initialisation done once at init time.
1108  */
1109 static struct rte_mempool *
1110 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1111                  unsigned int socket_id, uint16_t size_idx)
1112 {
1113         char pool_name[RTE_MEMPOOL_NAMESIZE];
1114         struct rte_mempool *rte_mp = NULL;
1115 #ifndef RTE_EXEC_ENV_WINDOWS
1116         uint32_t mb_size;
1117
1118         mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1119 #endif
1120         mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1121         if (!is_proc_primary()) {
1122                 rte_mp = rte_mempool_lookup(pool_name);
1123                 if (rte_mp == NULL)
1124                         rte_exit(EXIT_FAILURE,
1125                                 "Get mbuf pool for socket %u failed: %s\n",
1126                                 socket_id, rte_strerror(rte_errno));
1127                 return rte_mp;
1128         }
1129
1130         TESTPMD_LOG(INFO,
1131                 "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1132                 pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1133
1134         switch (mp_alloc_type) {
1135         case MP_ALLOC_NATIVE:
1136                 {
1137                         /* wrapper to rte_mempool_create() */
1138                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1139                                         rte_mbuf_best_mempool_ops());
1140                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1141                                 mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1142                         break;
1143                 }
1144 #ifndef RTE_EXEC_ENV_WINDOWS
1145         case MP_ALLOC_ANON:
1146                 {
1147                         rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1148                                 mb_size, (unsigned int) mb_mempool_cache,
1149                                 sizeof(struct rte_pktmbuf_pool_private),
1150                                 socket_id, mempool_flags);
1151                         if (rte_mp == NULL)
1152                                 goto err;
1153
1154                         if (rte_mempool_populate_anon(rte_mp) == 0) {
1155                                 rte_mempool_free(rte_mp);
1156                                 rte_mp = NULL;
1157                                 goto err;
1158                         }
1159                         rte_pktmbuf_pool_init(rte_mp, NULL);
1160                         rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1161                         rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1162                         break;
1163                 }
1164         case MP_ALLOC_XMEM:
1165         case MP_ALLOC_XMEM_HUGE:
1166                 {
1167                         int heap_socket;
1168                         bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1169
1170                         if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1171                                 rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1172
1173                         heap_socket =
1174                                 rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1175                         if (heap_socket < 0)
1176                                 rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1177
1178                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1179                                         rte_mbuf_best_mempool_ops());
1180                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1181                                         mb_mempool_cache, 0, mbuf_seg_size,
1182                                         heap_socket);
1183                         break;
1184                 }
1185 #endif
1186         case MP_ALLOC_XBUF:
1187                 {
1188                         struct rte_pktmbuf_extmem *ext_mem;
1189                         unsigned int ext_num;
1190
1191                         ext_num = setup_extbuf(nb_mbuf, mbuf_seg_size,
1192                                                socket_id, pool_name, &ext_mem);
1193                         if (ext_num == 0)
1194                                 rte_exit(EXIT_FAILURE,
1195                                          "Can't create pinned data buffers\n");
1196
1197                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1198                                         rte_mbuf_best_mempool_ops());
1199                         rte_mp = rte_pktmbuf_pool_create_extbuf
1200                                         (pool_name, nb_mbuf, mb_mempool_cache,
1201                                          0, mbuf_seg_size, socket_id,
1202                                          ext_mem, ext_num);
1203                         free(ext_mem);
1204                         break;
1205                 }
1206         default:
1207                 {
1208                         rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1209                 }
1210         }
1211
1212 #ifndef RTE_EXEC_ENV_WINDOWS
1213 err:
1214 #endif
1215         if (rte_mp == NULL) {
1216                 rte_exit(EXIT_FAILURE,
1217                         "Creation of mbuf pool for socket %u failed: %s\n",
1218                         socket_id, rte_strerror(rte_errno));
1219         } else if (verbose_level > 0) {
1220                 rte_mempool_dump(stdout, rte_mp);
1221         }
1222         return rte_mp;
1223 }
1224
1225 /*
1226  * Check given socket id is valid or not with NUMA mode,
1227  * if valid, return 0, else return -1
1228  */
1229 static int
1230 check_socket_id(const unsigned int socket_id)
1231 {
1232         static int warning_once = 0;
1233
1234         if (new_socket_id(socket_id)) {
1235                 if (!warning_once && numa_support)
1236                         fprintf(stderr,
1237                                 "Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1238                 warning_once = 1;
1239                 return -1;
1240         }
1241         return 0;
1242 }
1243
1244 /*
1245  * Get the allowed maximum number of RX queues.
1246  * *pid return the port id which has minimal value of
1247  * max_rx_queues in all ports.
1248  */
1249 queueid_t
1250 get_allowed_max_nb_rxq(portid_t *pid)
1251 {
1252         queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1253         bool max_rxq_valid = false;
1254         portid_t pi;
1255         struct rte_eth_dev_info dev_info;
1256
1257         RTE_ETH_FOREACH_DEV(pi) {
1258                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1259                         continue;
1260
1261                 max_rxq_valid = true;
1262                 if (dev_info.max_rx_queues < allowed_max_rxq) {
1263                         allowed_max_rxq = dev_info.max_rx_queues;
1264                         *pid = pi;
1265                 }
1266         }
1267         return max_rxq_valid ? allowed_max_rxq : 0;
1268 }
1269
1270 /*
1271  * Check input rxq is valid or not.
1272  * If input rxq is not greater than any of maximum number
1273  * of RX queues of all ports, it is valid.
1274  * if valid, return 0, else return -1
1275  */
1276 int
1277 check_nb_rxq(queueid_t rxq)
1278 {
1279         queueid_t allowed_max_rxq;
1280         portid_t pid = 0;
1281
1282         allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1283         if (rxq > allowed_max_rxq) {
1284                 fprintf(stderr,
1285                         "Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1286                         rxq, allowed_max_rxq, pid);
1287                 return -1;
1288         }
1289         return 0;
1290 }
1291
1292 /*
1293  * Get the allowed maximum number of TX queues.
1294  * *pid return the port id which has minimal value of
1295  * max_tx_queues in all ports.
1296  */
1297 queueid_t
1298 get_allowed_max_nb_txq(portid_t *pid)
1299 {
1300         queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1301         bool max_txq_valid = false;
1302         portid_t pi;
1303         struct rte_eth_dev_info dev_info;
1304
1305         RTE_ETH_FOREACH_DEV(pi) {
1306                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1307                         continue;
1308
1309                 max_txq_valid = true;
1310                 if (dev_info.max_tx_queues < allowed_max_txq) {
1311                         allowed_max_txq = dev_info.max_tx_queues;
1312                         *pid = pi;
1313                 }
1314         }
1315         return max_txq_valid ? allowed_max_txq : 0;
1316 }
1317
1318 /*
1319  * Check input txq is valid or not.
1320  * If input txq is not greater than any of maximum number
1321  * of TX queues of all ports, it is valid.
1322  * if valid, return 0, else return -1
1323  */
1324 int
1325 check_nb_txq(queueid_t txq)
1326 {
1327         queueid_t allowed_max_txq;
1328         portid_t pid = 0;
1329
1330         allowed_max_txq = get_allowed_max_nb_txq(&pid);
1331         if (txq > allowed_max_txq) {
1332                 fprintf(stderr,
1333                         "Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1334                         txq, allowed_max_txq, pid);
1335                 return -1;
1336         }
1337         return 0;
1338 }
1339
1340 /*
1341  * Get the allowed maximum number of RXDs of every rx queue.
1342  * *pid return the port id which has minimal value of
1343  * max_rxd in all queues of all ports.
1344  */
1345 static uint16_t
1346 get_allowed_max_nb_rxd(portid_t *pid)
1347 {
1348         uint16_t allowed_max_rxd = UINT16_MAX;
1349         portid_t pi;
1350         struct rte_eth_dev_info dev_info;
1351
1352         RTE_ETH_FOREACH_DEV(pi) {
1353                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1354                         continue;
1355
1356                 if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1357                         allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1358                         *pid = pi;
1359                 }
1360         }
1361         return allowed_max_rxd;
1362 }
1363
1364 /*
1365  * Get the allowed minimal number of RXDs of every rx queue.
1366  * *pid return the port id which has minimal value of
1367  * min_rxd in all queues of all ports.
1368  */
1369 static uint16_t
1370 get_allowed_min_nb_rxd(portid_t *pid)
1371 {
1372         uint16_t allowed_min_rxd = 0;
1373         portid_t pi;
1374         struct rte_eth_dev_info dev_info;
1375
1376         RTE_ETH_FOREACH_DEV(pi) {
1377                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1378                         continue;
1379
1380                 if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1381                         allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1382                         *pid = pi;
1383                 }
1384         }
1385
1386         return allowed_min_rxd;
1387 }
1388
1389 /*
1390  * Check input rxd is valid or not.
1391  * If input rxd is not greater than any of maximum number
1392  * of RXDs of every Rx queues and is not less than any of
1393  * minimal number of RXDs of every Rx queues, it is valid.
1394  * if valid, return 0, else return -1
1395  */
1396 int
1397 check_nb_rxd(queueid_t rxd)
1398 {
1399         uint16_t allowed_max_rxd;
1400         uint16_t allowed_min_rxd;
1401         portid_t pid = 0;
1402
1403         allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1404         if (rxd > allowed_max_rxd) {
1405                 fprintf(stderr,
1406                         "Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1407                         rxd, allowed_max_rxd, pid);
1408                 return -1;
1409         }
1410
1411         allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1412         if (rxd < allowed_min_rxd) {
1413                 fprintf(stderr,
1414                         "Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1415                         rxd, allowed_min_rxd, pid);
1416                 return -1;
1417         }
1418
1419         return 0;
1420 }
1421
1422 /*
1423  * Get the allowed maximum number of TXDs of every rx queues.
1424  * *pid return the port id which has minimal value of
1425  * max_txd in every tx queue.
1426  */
1427 static uint16_t
1428 get_allowed_max_nb_txd(portid_t *pid)
1429 {
1430         uint16_t allowed_max_txd = UINT16_MAX;
1431         portid_t pi;
1432         struct rte_eth_dev_info dev_info;
1433
1434         RTE_ETH_FOREACH_DEV(pi) {
1435                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1436                         continue;
1437
1438                 if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1439                         allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1440                         *pid = pi;
1441                 }
1442         }
1443         return allowed_max_txd;
1444 }
1445
1446 /*
1447  * Get the allowed maximum number of TXDs of every tx queues.
1448  * *pid return the port id which has minimal value of
1449  * min_txd in every tx queue.
1450  */
1451 static uint16_t
1452 get_allowed_min_nb_txd(portid_t *pid)
1453 {
1454         uint16_t allowed_min_txd = 0;
1455         portid_t pi;
1456         struct rte_eth_dev_info dev_info;
1457
1458         RTE_ETH_FOREACH_DEV(pi) {
1459                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1460                         continue;
1461
1462                 if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1463                         allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1464                         *pid = pi;
1465                 }
1466         }
1467
1468         return allowed_min_txd;
1469 }
1470
1471 /*
1472  * Check input txd is valid or not.
1473  * If input txd is not greater than any of maximum number
1474  * of TXDs of every Rx queues, it is valid.
1475  * if valid, return 0, else return -1
1476  */
1477 int
1478 check_nb_txd(queueid_t txd)
1479 {
1480         uint16_t allowed_max_txd;
1481         uint16_t allowed_min_txd;
1482         portid_t pid = 0;
1483
1484         allowed_max_txd = get_allowed_max_nb_txd(&pid);
1485         if (txd > allowed_max_txd) {
1486                 fprintf(stderr,
1487                         "Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1488                         txd, allowed_max_txd, pid);
1489                 return -1;
1490         }
1491
1492         allowed_min_txd = get_allowed_min_nb_txd(&pid);
1493         if (txd < allowed_min_txd) {
1494                 fprintf(stderr,
1495                         "Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1496                         txd, allowed_min_txd, pid);
1497                 return -1;
1498         }
1499         return 0;
1500 }
1501
1502
1503 /*
1504  * Get the allowed maximum number of hairpin queues.
1505  * *pid return the port id which has minimal value of
1506  * max_hairpin_queues in all ports.
1507  */
1508 queueid_t
1509 get_allowed_max_nb_hairpinq(portid_t *pid)
1510 {
1511         queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1512         portid_t pi;
1513         struct rte_eth_hairpin_cap cap;
1514
1515         RTE_ETH_FOREACH_DEV(pi) {
1516                 if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1517                         *pid = pi;
1518                         return 0;
1519                 }
1520                 if (cap.max_nb_queues < allowed_max_hairpinq) {
1521                         allowed_max_hairpinq = cap.max_nb_queues;
1522                         *pid = pi;
1523                 }
1524         }
1525         return allowed_max_hairpinq;
1526 }
1527
1528 /*
1529  * Check input hairpin is valid or not.
1530  * If input hairpin is not greater than any of maximum number
1531  * of hairpin queues of all ports, it is valid.
1532  * if valid, return 0, else return -1
1533  */
1534 int
1535 check_nb_hairpinq(queueid_t hairpinq)
1536 {
1537         queueid_t allowed_max_hairpinq;
1538         portid_t pid = 0;
1539
1540         allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1541         if (hairpinq > allowed_max_hairpinq) {
1542                 fprintf(stderr,
1543                         "Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1544                         hairpinq, allowed_max_hairpinq, pid);
1545                 return -1;
1546         }
1547         return 0;
1548 }
1549
1550 static int
1551 get_eth_overhead(struct rte_eth_dev_info *dev_info)
1552 {
1553         uint32_t eth_overhead;
1554
1555         if (dev_info->max_mtu != UINT16_MAX &&
1556             dev_info->max_rx_pktlen > dev_info->max_mtu)
1557                 eth_overhead = dev_info->max_rx_pktlen - dev_info->max_mtu;
1558         else
1559                 eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
1560
1561         return eth_overhead;
1562 }
1563
1564 static void
1565 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1566 {
1567         struct rte_port *port = &ports[pid];
1568         int ret;
1569         int i;
1570
1571         eth_rx_metadata_negotiate_mp(pid);
1572         flow_pick_transfer_proxy_mp(pid);
1573
1574         port->dev_conf.txmode = tx_mode;
1575         port->dev_conf.rxmode = rx_mode;
1576
1577         ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1578         if (ret != 0)
1579                 rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1580
1581         if (!(port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1582                 port->dev_conf.txmode.offloads &=
1583                         ~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1584
1585         /* Apply Rx offloads configuration */
1586         for (i = 0; i < port->dev_info.max_rx_queues; i++)
1587                 port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1588         /* Apply Tx offloads configuration */
1589         for (i = 0; i < port->dev_info.max_tx_queues; i++)
1590                 port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1591
1592         if (eth_link_speed)
1593                 port->dev_conf.link_speeds = eth_link_speed;
1594
1595         if (max_rx_pkt_len)
1596                 port->dev_conf.rxmode.mtu = max_rx_pkt_len -
1597                         get_eth_overhead(&port->dev_info);
1598
1599         /* set flag to initialize port/queue */
1600         port->need_reconfig = 1;
1601         port->need_reconfig_queues = 1;
1602         port->socket_id = socket_id;
1603         port->tx_metadata = 0;
1604
1605         /*
1606          * Check for maximum number of segments per MTU.
1607          * Accordingly update the mbuf data size.
1608          */
1609         if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1610             port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1611                 uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
1612                 uint16_t mtu;
1613
1614                 if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
1615                         uint16_t data_size = (mtu + eth_overhead) /
1616                                 port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1617                         uint16_t buffer_size = data_size + RTE_PKTMBUF_HEADROOM;
1618
1619                         if (buffer_size > mbuf_data_size[0]) {
1620                                 mbuf_data_size[0] = buffer_size;
1621                                 TESTPMD_LOG(WARNING,
1622                                         "Configured mbuf size of the first segment %hu\n",
1623                                         mbuf_data_size[0]);
1624                         }
1625                 }
1626         }
1627 }
1628
1629 static void
1630 init_config(void)
1631 {
1632         portid_t pid;
1633         struct rte_mempool *mbp;
1634         unsigned int nb_mbuf_per_pool;
1635         lcoreid_t  lc_id;
1636         struct rte_gro_param gro_param;
1637         uint32_t gso_types;
1638
1639         /* Configuration of logical cores. */
1640         fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1641                                 sizeof(struct fwd_lcore *) * nb_lcores,
1642                                 RTE_CACHE_LINE_SIZE);
1643         if (fwd_lcores == NULL) {
1644                 rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1645                                                         "failed\n", nb_lcores);
1646         }
1647         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1648                 fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1649                                                sizeof(struct fwd_lcore),
1650                                                RTE_CACHE_LINE_SIZE);
1651                 if (fwd_lcores[lc_id] == NULL) {
1652                         rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1653                                                                 "failed\n");
1654                 }
1655                 fwd_lcores[lc_id]->cpuid_idx = lc_id;
1656         }
1657
1658         RTE_ETH_FOREACH_DEV(pid) {
1659                 uint32_t socket_id;
1660
1661                 if (numa_support) {
1662                         socket_id = port_numa[pid];
1663                         if (port_numa[pid] == NUMA_NO_CONFIG) {
1664                                 socket_id = rte_eth_dev_socket_id(pid);
1665
1666                                 /*
1667                                  * if socket_id is invalid,
1668                                  * set to the first available socket.
1669                                  */
1670                                 if (check_socket_id(socket_id) < 0)
1671                                         socket_id = socket_ids[0];
1672                         }
1673                 } else {
1674                         socket_id = (socket_num == UMA_NO_CONFIG) ?
1675                                     0 : socket_num;
1676                 }
1677                 /* Apply default TxRx configuration for all ports */
1678                 init_config_port_offloads(pid, socket_id);
1679         }
1680         /*
1681          * Create pools of mbuf.
1682          * If NUMA support is disabled, create a single pool of mbuf in
1683          * socket 0 memory by default.
1684          * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1685          *
1686          * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1687          * nb_txd can be configured at run time.
1688          */
1689         if (param_total_num_mbufs)
1690                 nb_mbuf_per_pool = param_total_num_mbufs;
1691         else {
1692                 nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1693                         (nb_lcores * mb_mempool_cache) +
1694                         RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1695                 nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1696         }
1697
1698         if (numa_support) {
1699                 uint8_t i, j;
1700
1701                 for (i = 0; i < num_sockets; i++)
1702                         for (j = 0; j < mbuf_data_size_n; j++)
1703                                 mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1704                                         mbuf_pool_create(mbuf_data_size[j],
1705                                                           nb_mbuf_per_pool,
1706                                                           socket_ids[i], j);
1707         } else {
1708                 uint8_t i;
1709
1710                 for (i = 0; i < mbuf_data_size_n; i++)
1711                         mempools[i] = mbuf_pool_create
1712                                         (mbuf_data_size[i],
1713                                          nb_mbuf_per_pool,
1714                                          socket_num == UMA_NO_CONFIG ?
1715                                          0 : socket_num, i);
1716         }
1717
1718         init_port_config();
1719
1720         gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1721                 DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1722         /*
1723          * Records which Mbuf pool to use by each logical core, if needed.
1724          */
1725         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1726                 mbp = mbuf_pool_find(
1727                         rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1728
1729                 if (mbp == NULL)
1730                         mbp = mbuf_pool_find(0, 0);
1731                 fwd_lcores[lc_id]->mbp = mbp;
1732                 /* initialize GSO context */
1733                 fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1734                 fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1735                 fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1736                 fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1737                         RTE_ETHER_CRC_LEN;
1738                 fwd_lcores[lc_id]->gso_ctx.flag = 0;
1739         }
1740
1741         fwd_config_setup();
1742
1743         /* create a gro context for each lcore */
1744         gro_param.gro_types = RTE_GRO_TCP_IPV4;
1745         gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1746         gro_param.max_item_per_flow = MAX_PKT_BURST;
1747         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1748                 gro_param.socket_id = rte_lcore_to_socket_id(
1749                                 fwd_lcores_cpuids[lc_id]);
1750                 fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1751                 if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1752                         rte_exit(EXIT_FAILURE,
1753                                         "rte_gro_ctx_create() failed\n");
1754                 }
1755         }
1756 }
1757
1758
1759 void
1760 reconfig(portid_t new_port_id, unsigned socket_id)
1761 {
1762         /* Reconfiguration of Ethernet ports. */
1763         init_config_port_offloads(new_port_id, socket_id);
1764         init_port_config();
1765 }
1766
1767
1768 int
1769 init_fwd_streams(void)
1770 {
1771         portid_t pid;
1772         struct rte_port *port;
1773         streamid_t sm_id, nb_fwd_streams_new;
1774         queueid_t q;
1775
1776         /* set socket id according to numa or not */
1777         RTE_ETH_FOREACH_DEV(pid) {
1778                 port = &ports[pid];
1779                 if (nb_rxq > port->dev_info.max_rx_queues) {
1780                         fprintf(stderr,
1781                                 "Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1782                                 nb_rxq, port->dev_info.max_rx_queues);
1783                         return -1;
1784                 }
1785                 if (nb_txq > port->dev_info.max_tx_queues) {
1786                         fprintf(stderr,
1787                                 "Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1788                                 nb_txq, port->dev_info.max_tx_queues);
1789                         return -1;
1790                 }
1791                 if (numa_support) {
1792                         if (port_numa[pid] != NUMA_NO_CONFIG)
1793                                 port->socket_id = port_numa[pid];
1794                         else {
1795                                 port->socket_id = rte_eth_dev_socket_id(pid);
1796
1797                                 /*
1798                                  * if socket_id is invalid,
1799                                  * set to the first available socket.
1800                                  */
1801                                 if (check_socket_id(port->socket_id) < 0)
1802                                         port->socket_id = socket_ids[0];
1803                         }
1804                 }
1805                 else {
1806                         if (socket_num == UMA_NO_CONFIG)
1807                                 port->socket_id = 0;
1808                         else
1809                                 port->socket_id = socket_num;
1810                 }
1811         }
1812
1813         q = RTE_MAX(nb_rxq, nb_txq);
1814         if (q == 0) {
1815                 fprintf(stderr,
1816                         "Fail: Cannot allocate fwd streams as number of queues is 0\n");
1817                 return -1;
1818         }
1819         nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1820         if (nb_fwd_streams_new == nb_fwd_streams)
1821                 return 0;
1822         /* clear the old */
1823         if (fwd_streams != NULL) {
1824                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1825                         if (fwd_streams[sm_id] == NULL)
1826                                 continue;
1827                         rte_free(fwd_streams[sm_id]);
1828                         fwd_streams[sm_id] = NULL;
1829                 }
1830                 rte_free(fwd_streams);
1831                 fwd_streams = NULL;
1832         }
1833
1834         /* init new */
1835         nb_fwd_streams = nb_fwd_streams_new;
1836         if (nb_fwd_streams) {
1837                 fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1838                         sizeof(struct fwd_stream *) * nb_fwd_streams,
1839                         RTE_CACHE_LINE_SIZE);
1840                 if (fwd_streams == NULL)
1841                         rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1842                                  " (struct fwd_stream *)) failed\n",
1843                                  nb_fwd_streams);
1844
1845                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1846                         fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1847                                 " struct fwd_stream", sizeof(struct fwd_stream),
1848                                 RTE_CACHE_LINE_SIZE);
1849                         if (fwd_streams[sm_id] == NULL)
1850                                 rte_exit(EXIT_FAILURE, "rte_zmalloc"
1851                                          "(struct fwd_stream) failed\n");
1852                 }
1853         }
1854
1855         return 0;
1856 }
1857
1858 static void
1859 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1860 {
1861         uint64_t total_burst, sburst;
1862         uint64_t nb_burst;
1863         uint64_t burst_stats[4];
1864         uint16_t pktnb_stats[4];
1865         uint16_t nb_pkt;
1866         int burst_percent[4], sburstp;
1867         int i;
1868
1869         /*
1870          * First compute the total number of packet bursts and the
1871          * two highest numbers of bursts of the same number of packets.
1872          */
1873         memset(&burst_stats, 0x0, sizeof(burst_stats));
1874         memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1875
1876         /* Show stats for 0 burst size always */
1877         total_burst = pbs->pkt_burst_spread[0];
1878         burst_stats[0] = pbs->pkt_burst_spread[0];
1879         pktnb_stats[0] = 0;
1880
1881         /* Find the next 2 burst sizes with highest occurrences. */
1882         for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1883                 nb_burst = pbs->pkt_burst_spread[nb_pkt];
1884
1885                 if (nb_burst == 0)
1886                         continue;
1887
1888                 total_burst += nb_burst;
1889
1890                 if (nb_burst > burst_stats[1]) {
1891                         burst_stats[2] = burst_stats[1];
1892                         pktnb_stats[2] = pktnb_stats[1];
1893                         burst_stats[1] = nb_burst;
1894                         pktnb_stats[1] = nb_pkt;
1895                 } else if (nb_burst > burst_stats[2]) {
1896                         burst_stats[2] = nb_burst;
1897                         pktnb_stats[2] = nb_pkt;
1898                 }
1899         }
1900         if (total_burst == 0)
1901                 return;
1902
1903         printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1904         for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1905                 if (i == 3) {
1906                         printf("%d%% of other]\n", 100 - sburstp);
1907                         return;
1908                 }
1909
1910                 sburst += burst_stats[i];
1911                 if (sburst == total_burst) {
1912                         printf("%d%% of %d pkts]\n",
1913                                 100 - sburstp, (int) pktnb_stats[i]);
1914                         return;
1915                 }
1916
1917                 burst_percent[i] =
1918                         (double)burst_stats[i] / total_burst * 100;
1919                 printf("%d%% of %d pkts + ",
1920                         burst_percent[i], (int) pktnb_stats[i]);
1921                 sburstp += burst_percent[i];
1922         }
1923 }
1924
1925 static void
1926 fwd_stream_stats_display(streamid_t stream_id)
1927 {
1928         struct fwd_stream *fs;
1929         static const char *fwd_top_stats_border = "-------";
1930
1931         fs = fwd_streams[stream_id];
1932         if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1933             (fs->fwd_dropped == 0))
1934                 return;
1935         printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1936                "TX Port=%2d/Queue=%2d %s\n",
1937                fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1938                fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1939         printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1940                " TX-dropped: %-14"PRIu64,
1941                fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1942
1943         /* if checksum mode */
1944         if (cur_fwd_eng == &csum_fwd_engine) {
1945                 printf("  RX- bad IP checksum: %-14"PRIu64
1946                        "  Rx- bad L4 checksum: %-14"PRIu64
1947                        " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1948                         fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1949                         fs->rx_bad_outer_l4_csum);
1950                 printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1951                         fs->rx_bad_outer_ip_csum);
1952         } else {
1953                 printf("\n");
1954         }
1955
1956         if (record_burst_stats) {
1957                 pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1958                 pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1959         }
1960 }
1961
1962 void
1963 fwd_stats_display(void)
1964 {
1965         static const char *fwd_stats_border = "----------------------";
1966         static const char *acc_stats_border = "+++++++++++++++";
1967         struct {
1968                 struct fwd_stream *rx_stream;
1969                 struct fwd_stream *tx_stream;
1970                 uint64_t tx_dropped;
1971                 uint64_t rx_bad_ip_csum;
1972                 uint64_t rx_bad_l4_csum;
1973                 uint64_t rx_bad_outer_l4_csum;
1974                 uint64_t rx_bad_outer_ip_csum;
1975         } ports_stats[RTE_MAX_ETHPORTS];
1976         uint64_t total_rx_dropped = 0;
1977         uint64_t total_tx_dropped = 0;
1978         uint64_t total_rx_nombuf = 0;
1979         struct rte_eth_stats stats;
1980         uint64_t fwd_cycles = 0;
1981         uint64_t total_recv = 0;
1982         uint64_t total_xmit = 0;
1983         struct rte_port *port;
1984         streamid_t sm_id;
1985         portid_t pt_id;
1986         int i;
1987
1988         memset(ports_stats, 0, sizeof(ports_stats));
1989
1990         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1991                 struct fwd_stream *fs = fwd_streams[sm_id];
1992
1993                 if (cur_fwd_config.nb_fwd_streams >
1994                     cur_fwd_config.nb_fwd_ports) {
1995                         fwd_stream_stats_display(sm_id);
1996                 } else {
1997                         ports_stats[fs->tx_port].tx_stream = fs;
1998                         ports_stats[fs->rx_port].rx_stream = fs;
1999                 }
2000
2001                 ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
2002
2003                 ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
2004                 ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
2005                 ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
2006                                 fs->rx_bad_outer_l4_csum;
2007                 ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
2008                                 fs->rx_bad_outer_ip_csum;
2009
2010                 if (record_core_cycles)
2011                         fwd_cycles += fs->core_cycles;
2012         }
2013         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2014                 pt_id = fwd_ports_ids[i];
2015                 port = &ports[pt_id];
2016
2017                 rte_eth_stats_get(pt_id, &stats);
2018                 stats.ipackets -= port->stats.ipackets;
2019                 stats.opackets -= port->stats.opackets;
2020                 stats.ibytes -= port->stats.ibytes;
2021                 stats.obytes -= port->stats.obytes;
2022                 stats.imissed -= port->stats.imissed;
2023                 stats.oerrors -= port->stats.oerrors;
2024                 stats.rx_nombuf -= port->stats.rx_nombuf;
2025
2026                 total_recv += stats.ipackets;
2027                 total_xmit += stats.opackets;
2028                 total_rx_dropped += stats.imissed;
2029                 total_tx_dropped += ports_stats[pt_id].tx_dropped;
2030                 total_tx_dropped += stats.oerrors;
2031                 total_rx_nombuf  += stats.rx_nombuf;
2032
2033                 printf("\n  %s Forward statistics for port %-2d %s\n",
2034                        fwd_stats_border, pt_id, fwd_stats_border);
2035
2036                 printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2037                        "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2038                        stats.ipackets + stats.imissed);
2039
2040                 if (cur_fwd_eng == &csum_fwd_engine) {
2041                         printf("  Bad-ipcsum: %-14"PRIu64
2042                                " Bad-l4csum: %-14"PRIu64
2043                                "Bad-outer-l4csum: %-14"PRIu64"\n",
2044                                ports_stats[pt_id].rx_bad_ip_csum,
2045                                ports_stats[pt_id].rx_bad_l4_csum,
2046                                ports_stats[pt_id].rx_bad_outer_l4_csum);
2047                         printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2048                                ports_stats[pt_id].rx_bad_outer_ip_csum);
2049                 }
2050                 if (stats.ierrors + stats.rx_nombuf > 0) {
2051                         printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2052                         printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2053                 }
2054
2055                 printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2056                        "TX-total: %-"PRIu64"\n",
2057                        stats.opackets, ports_stats[pt_id].tx_dropped,
2058                        stats.opackets + ports_stats[pt_id].tx_dropped);
2059
2060                 if (record_burst_stats) {
2061                         if (ports_stats[pt_id].rx_stream)
2062                                 pkt_burst_stats_display("RX",
2063                                         &ports_stats[pt_id].rx_stream->rx_burst_stats);
2064                         if (ports_stats[pt_id].tx_stream)
2065                                 pkt_burst_stats_display("TX",
2066                                 &ports_stats[pt_id].tx_stream->tx_burst_stats);
2067                 }
2068
2069                 printf("  %s--------------------------------%s\n",
2070                        fwd_stats_border, fwd_stats_border);
2071         }
2072
2073         printf("\n  %s Accumulated forward statistics for all ports"
2074                "%s\n",
2075                acc_stats_border, acc_stats_border);
2076         printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2077                "%-"PRIu64"\n"
2078                "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2079                "%-"PRIu64"\n",
2080                total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2081                total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2082         if (total_rx_nombuf > 0)
2083                 printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2084         printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2085                "%s\n",
2086                acc_stats_border, acc_stats_border);
2087         if (record_core_cycles) {
2088 #define CYC_PER_MHZ 1E6
2089                 if (total_recv > 0 || total_xmit > 0) {
2090                         uint64_t total_pkts = 0;
2091                         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2092                             strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2093                                 total_pkts = total_xmit;
2094                         else
2095                                 total_pkts = total_recv;
2096
2097                         printf("\n  CPU cycles/packet=%.2F (total cycles="
2098                                "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2099                                " MHz Clock\n",
2100                                (double) fwd_cycles / total_pkts,
2101                                fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2102                                (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2103                 }
2104         }
2105 }
2106
2107 void
2108 fwd_stats_reset(void)
2109 {
2110         streamid_t sm_id;
2111         portid_t pt_id;
2112         int i;
2113
2114         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2115                 pt_id = fwd_ports_ids[i];
2116                 rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2117         }
2118         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2119                 struct fwd_stream *fs = fwd_streams[sm_id];
2120
2121                 fs->rx_packets = 0;
2122                 fs->tx_packets = 0;
2123                 fs->fwd_dropped = 0;
2124                 fs->rx_bad_ip_csum = 0;
2125                 fs->rx_bad_l4_csum = 0;
2126                 fs->rx_bad_outer_l4_csum = 0;
2127                 fs->rx_bad_outer_ip_csum = 0;
2128
2129                 memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2130                 memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2131                 fs->core_cycles = 0;
2132         }
2133 }
2134
2135 static void
2136 flush_fwd_rx_queues(void)
2137 {
2138         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2139         portid_t  rxp;
2140         portid_t port_id;
2141         queueid_t rxq;
2142         uint16_t  nb_rx;
2143         uint16_t  i;
2144         uint8_t   j;
2145         uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2146         uint64_t timer_period;
2147
2148         if (num_procs > 1) {
2149                 printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2150                 return;
2151         }
2152
2153         /* convert to number of cycles */
2154         timer_period = rte_get_timer_hz(); /* 1 second timeout */
2155
2156         for (j = 0; j < 2; j++) {
2157                 for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2158                         for (rxq = 0; rxq < nb_rxq; rxq++) {
2159                                 port_id = fwd_ports_ids[rxp];
2160                                 /**
2161                                 * testpmd can stuck in the below do while loop
2162                                 * if rte_eth_rx_burst() always returns nonzero
2163                                 * packets. So timer is added to exit this loop
2164                                 * after 1sec timer expiry.
2165                                 */
2166                                 prev_tsc = rte_rdtsc();
2167                                 do {
2168                                         nb_rx = rte_eth_rx_burst(port_id, rxq,
2169                                                 pkts_burst, MAX_PKT_BURST);
2170                                         for (i = 0; i < nb_rx; i++)
2171                                                 rte_pktmbuf_free(pkts_burst[i]);
2172
2173                                         cur_tsc = rte_rdtsc();
2174                                         diff_tsc = cur_tsc - prev_tsc;
2175                                         timer_tsc += diff_tsc;
2176                                 } while ((nb_rx > 0) &&
2177                                         (timer_tsc < timer_period));
2178                                 timer_tsc = 0;
2179                         }
2180                 }
2181                 rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2182         }
2183 }
2184
2185 static void
2186 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2187 {
2188         struct fwd_stream **fsm;
2189         streamid_t nb_fs;
2190         streamid_t sm_id;
2191 #ifdef RTE_LIB_BITRATESTATS
2192         uint64_t tics_per_1sec;
2193         uint64_t tics_datum;
2194         uint64_t tics_current;
2195         uint16_t i, cnt_ports;
2196
2197         cnt_ports = nb_ports;
2198         tics_datum = rte_rdtsc();
2199         tics_per_1sec = rte_get_timer_hz();
2200 #endif
2201         fsm = &fwd_streams[fc->stream_idx];
2202         nb_fs = fc->stream_nb;
2203         do {
2204                 for (sm_id = 0; sm_id < nb_fs; sm_id++)
2205                         (*pkt_fwd)(fsm[sm_id]);
2206 #ifdef RTE_LIB_BITRATESTATS
2207                 if (bitrate_enabled != 0 &&
2208                                 bitrate_lcore_id == rte_lcore_id()) {
2209                         tics_current = rte_rdtsc();
2210                         if (tics_current - tics_datum >= tics_per_1sec) {
2211                                 /* Periodic bitrate calculation */
2212                                 for (i = 0; i < cnt_ports; i++)
2213                                         rte_stats_bitrate_calc(bitrate_data,
2214                                                 ports_ids[i]);
2215                                 tics_datum = tics_current;
2216                         }
2217                 }
2218 #endif
2219 #ifdef RTE_LIB_LATENCYSTATS
2220                 if (latencystats_enabled != 0 &&
2221                                 latencystats_lcore_id == rte_lcore_id())
2222                         rte_latencystats_update();
2223 #endif
2224
2225         } while (! fc->stopped);
2226 }
2227
2228 static int
2229 start_pkt_forward_on_core(void *fwd_arg)
2230 {
2231         run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2232                              cur_fwd_config.fwd_eng->packet_fwd);
2233         return 0;
2234 }
2235
2236 /*
2237  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2238  * Used to start communication flows in network loopback test configurations.
2239  */
2240 static int
2241 run_one_txonly_burst_on_core(void *fwd_arg)
2242 {
2243         struct fwd_lcore *fwd_lc;
2244         struct fwd_lcore tmp_lcore;
2245
2246         fwd_lc = (struct fwd_lcore *) fwd_arg;
2247         tmp_lcore = *fwd_lc;
2248         tmp_lcore.stopped = 1;
2249         run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2250         return 0;
2251 }
2252
2253 /*
2254  * Launch packet forwarding:
2255  *     - Setup per-port forwarding context.
2256  *     - launch logical cores with their forwarding configuration.
2257  */
2258 static void
2259 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2260 {
2261         unsigned int i;
2262         unsigned int lc_id;
2263         int diag;
2264
2265         for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2266                 lc_id = fwd_lcores_cpuids[i];
2267                 if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2268                         fwd_lcores[i]->stopped = 0;
2269                         diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2270                                                      fwd_lcores[i], lc_id);
2271                         if (diag != 0)
2272                                 fprintf(stderr,
2273                                         "launch lcore %u failed - diag=%d\n",
2274                                         lc_id, diag);
2275                 }
2276         }
2277 }
2278
2279 /*
2280  * Launch packet forwarding configuration.
2281  */
2282 void
2283 start_packet_forwarding(int with_tx_first)
2284 {
2285         port_fwd_begin_t port_fwd_begin;
2286         port_fwd_end_t  port_fwd_end;
2287         unsigned int i;
2288
2289         if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2290                 rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2291
2292         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2293                 rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2294
2295         if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2296                 strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2297                 (!nb_rxq || !nb_txq))
2298                 rte_exit(EXIT_FAILURE,
2299                         "Either rxq or txq are 0, cannot use %s fwd mode\n",
2300                         cur_fwd_eng->fwd_mode_name);
2301
2302         if (all_ports_started() == 0) {
2303                 fprintf(stderr, "Not all ports were started\n");
2304                 return;
2305         }
2306         if (test_done == 0) {
2307                 fprintf(stderr, "Packet forwarding already started\n");
2308                 return;
2309         }
2310
2311         fwd_config_setup();
2312
2313         pkt_fwd_config_display(&cur_fwd_config);
2314         if (!pkt_fwd_shared_rxq_check())
2315                 return;
2316
2317         port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2318         if (port_fwd_begin != NULL) {
2319                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2320                         if (port_fwd_begin(fwd_ports_ids[i])) {
2321                                 fprintf(stderr,
2322                                         "Packet forwarding is not ready\n");
2323                                 return;
2324                         }
2325                 }
2326         }
2327
2328         if (with_tx_first) {
2329                 port_fwd_begin = tx_only_engine.port_fwd_begin;
2330                 if (port_fwd_begin != NULL) {
2331                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2332                                 if (port_fwd_begin(fwd_ports_ids[i])) {
2333                                         fprintf(stderr,
2334                                                 "Packet forwarding is not ready\n");
2335                                         return;
2336                                 }
2337                         }
2338                 }
2339         }
2340
2341         test_done = 0;
2342
2343         if(!no_flush_rx)
2344                 flush_fwd_rx_queues();
2345
2346         rxtx_config_display();
2347
2348         fwd_stats_reset();
2349         if (with_tx_first) {
2350                 while (with_tx_first--) {
2351                         launch_packet_forwarding(
2352                                         run_one_txonly_burst_on_core);
2353                         rte_eal_mp_wait_lcore();
2354                 }
2355                 port_fwd_end = tx_only_engine.port_fwd_end;
2356                 if (port_fwd_end != NULL) {
2357                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2358                                 (*port_fwd_end)(fwd_ports_ids[i]);
2359                 }
2360         }
2361         launch_packet_forwarding(start_pkt_forward_on_core);
2362 }
2363
2364 void
2365 stop_packet_forwarding(void)
2366 {
2367         port_fwd_end_t port_fwd_end;
2368         lcoreid_t lc_id;
2369         portid_t pt_id;
2370         int i;
2371
2372         if (test_done) {
2373                 fprintf(stderr, "Packet forwarding not started\n");
2374                 return;
2375         }
2376         printf("Telling cores to stop...");
2377         for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2378                 fwd_lcores[lc_id]->stopped = 1;
2379         printf("\nWaiting for lcores to finish...\n");
2380         rte_eal_mp_wait_lcore();
2381         port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2382         if (port_fwd_end != NULL) {
2383                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2384                         pt_id = fwd_ports_ids[i];
2385                         (*port_fwd_end)(pt_id);
2386                 }
2387         }
2388
2389         fwd_stats_display();
2390
2391         printf("\nDone.\n");
2392         test_done = 1;
2393 }
2394
2395 void
2396 dev_set_link_up(portid_t pid)
2397 {
2398         if (rte_eth_dev_set_link_up(pid) < 0)
2399                 fprintf(stderr, "\nSet link up fail.\n");
2400 }
2401
2402 void
2403 dev_set_link_down(portid_t pid)
2404 {
2405         if (rte_eth_dev_set_link_down(pid) < 0)
2406                 fprintf(stderr, "\nSet link down fail.\n");
2407 }
2408
2409 static int
2410 all_ports_started(void)
2411 {
2412         portid_t pi;
2413         struct rte_port *port;
2414
2415         RTE_ETH_FOREACH_DEV(pi) {
2416                 port = &ports[pi];
2417                 /* Check if there is a port which is not started */
2418                 if ((port->port_status != RTE_PORT_STARTED) &&
2419                         (port->slave_flag == 0))
2420                         return 0;
2421         }
2422
2423         /* No port is not started */
2424         return 1;
2425 }
2426
2427 int
2428 port_is_stopped(portid_t port_id)
2429 {
2430         struct rte_port *port = &ports[port_id];
2431
2432         if ((port->port_status != RTE_PORT_STOPPED) &&
2433             (port->slave_flag == 0))
2434                 return 0;
2435         return 1;
2436 }
2437
2438 int
2439 all_ports_stopped(void)
2440 {
2441         portid_t pi;
2442
2443         RTE_ETH_FOREACH_DEV(pi) {
2444                 if (!port_is_stopped(pi))
2445                         return 0;
2446         }
2447
2448         return 1;
2449 }
2450
2451 int
2452 port_is_started(portid_t port_id)
2453 {
2454         if (port_id_is_invalid(port_id, ENABLED_WARN))
2455                 return 0;
2456
2457         if (ports[port_id].port_status != RTE_PORT_STARTED)
2458                 return 0;
2459
2460         return 1;
2461 }
2462
2463 /* Configure the Rx and Tx hairpin queues for the selected port. */
2464 static int
2465 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2466 {
2467         queueid_t qi;
2468         struct rte_eth_hairpin_conf hairpin_conf = {
2469                 .peer_count = 1,
2470         };
2471         int i;
2472         int diag;
2473         struct rte_port *port = &ports[pi];
2474         uint16_t peer_rx_port = pi;
2475         uint16_t peer_tx_port = pi;
2476         uint32_t manual = 1;
2477         uint32_t tx_exp = hairpin_mode & 0x10;
2478
2479         if (!(hairpin_mode & 0xf)) {
2480                 peer_rx_port = pi;
2481                 peer_tx_port = pi;
2482                 manual = 0;
2483         } else if (hairpin_mode & 0x1) {
2484                 peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2485                                                        RTE_ETH_DEV_NO_OWNER);
2486                 if (peer_tx_port >= RTE_MAX_ETHPORTS)
2487                         peer_tx_port = rte_eth_find_next_owned_by(0,
2488                                                 RTE_ETH_DEV_NO_OWNER);
2489                 if (p_pi != RTE_MAX_ETHPORTS) {
2490                         peer_rx_port = p_pi;
2491                 } else {
2492                         uint16_t next_pi;
2493
2494                         /* Last port will be the peer RX port of the first. */
2495                         RTE_ETH_FOREACH_DEV(next_pi)
2496                                 peer_rx_port = next_pi;
2497                 }
2498                 manual = 1;
2499         } else if (hairpin_mode & 0x2) {
2500                 if (cnt_pi & 0x1) {
2501                         peer_rx_port = p_pi;
2502                 } else {
2503                         peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2504                                                 RTE_ETH_DEV_NO_OWNER);
2505                         if (peer_rx_port >= RTE_MAX_ETHPORTS)
2506                                 peer_rx_port = pi;
2507                 }
2508                 peer_tx_port = peer_rx_port;
2509                 manual = 1;
2510         }
2511
2512         for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2513                 hairpin_conf.peers[0].port = peer_rx_port;
2514                 hairpin_conf.peers[0].queue = i + nb_rxq;
2515                 hairpin_conf.manual_bind = !!manual;
2516                 hairpin_conf.tx_explicit = !!tx_exp;
2517                 diag = rte_eth_tx_hairpin_queue_setup
2518                         (pi, qi, nb_txd, &hairpin_conf);
2519                 i++;
2520                 if (diag == 0)
2521                         continue;
2522
2523                 /* Fail to setup rx queue, return */
2524                 if (rte_atomic16_cmpset(&(port->port_status),
2525                                         RTE_PORT_HANDLING,
2526                                         RTE_PORT_STOPPED) == 0)
2527                         fprintf(stderr,
2528                                 "Port %d can not be set back to stopped\n", pi);
2529                 fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2530                         pi);
2531                 /* try to reconfigure queues next time */
2532                 port->need_reconfig_queues = 1;
2533                 return -1;
2534         }
2535         for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2536                 hairpin_conf.peers[0].port = peer_tx_port;
2537                 hairpin_conf.peers[0].queue = i + nb_txq;
2538                 hairpin_conf.manual_bind = !!manual;
2539                 hairpin_conf.tx_explicit = !!tx_exp;
2540                 diag = rte_eth_rx_hairpin_queue_setup
2541                         (pi, qi, nb_rxd, &hairpin_conf);
2542                 i++;
2543                 if (diag == 0)
2544                         continue;
2545
2546                 /* Fail to setup rx queue, return */
2547                 if (rte_atomic16_cmpset(&(port->port_status),
2548                                         RTE_PORT_HANDLING,
2549                                         RTE_PORT_STOPPED) == 0)
2550                         fprintf(stderr,
2551                                 "Port %d can not be set back to stopped\n", pi);
2552                 fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2553                         pi);
2554                 /* try to reconfigure queues next time */
2555                 port->need_reconfig_queues = 1;
2556                 return -1;
2557         }
2558         return 0;
2559 }
2560
2561 /* Configure the Rx with optional split. */
2562 int
2563 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2564                uint16_t nb_rx_desc, unsigned int socket_id,
2565                struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2566 {
2567         union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2568         unsigned int i, mp_n;
2569         int ret;
2570
2571         if (rx_pkt_nb_segs <= 1 ||
2572             (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2573                 rx_conf->rx_seg = NULL;
2574                 rx_conf->rx_nseg = 0;
2575                 ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2576                                              nb_rx_desc, socket_id,
2577                                              rx_conf, mp);
2578                 return ret;
2579         }
2580         for (i = 0; i < rx_pkt_nb_segs; i++) {
2581                 struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2582                 struct rte_mempool *mpx;
2583                 /*
2584                  * Use last valid pool for the segments with number
2585                  * exceeding the pool index.
2586                  */
2587                 mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2588                 mpx = mbuf_pool_find(socket_id, mp_n);
2589                 /* Handle zero as mbuf data buffer size. */
2590                 rx_seg->length = rx_pkt_seg_lengths[i] ?
2591                                    rx_pkt_seg_lengths[i] :
2592                                    mbuf_data_size[mp_n];
2593                 rx_seg->offset = i < rx_pkt_nb_offs ?
2594                                    rx_pkt_seg_offsets[i] : 0;
2595                 rx_seg->mp = mpx ? mpx : mp;
2596         }
2597         rx_conf->rx_nseg = rx_pkt_nb_segs;
2598         rx_conf->rx_seg = rx_useg;
2599         ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2600                                     socket_id, rx_conf, NULL);
2601         rx_conf->rx_seg = NULL;
2602         rx_conf->rx_nseg = 0;
2603         return ret;
2604 }
2605
2606 static int
2607 alloc_xstats_display_info(portid_t pi)
2608 {
2609         uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2610         uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2611         uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2612
2613         if (xstats_display_num == 0)
2614                 return 0;
2615
2616         *ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2617         if (*ids_supp == NULL)
2618                 goto fail_ids_supp;
2619
2620         *prev_values = calloc(xstats_display_num,
2621                               sizeof(**prev_values));
2622         if (*prev_values == NULL)
2623                 goto fail_prev_values;
2624
2625         *curr_values = calloc(xstats_display_num,
2626                               sizeof(**curr_values));
2627         if (*curr_values == NULL)
2628                 goto fail_curr_values;
2629
2630         ports[pi].xstats_info.allocated = true;
2631
2632         return 0;
2633
2634 fail_curr_values:
2635         free(*prev_values);
2636 fail_prev_values:
2637         free(*ids_supp);
2638 fail_ids_supp:
2639         return -ENOMEM;
2640 }
2641
2642 static void
2643 free_xstats_display_info(portid_t pi)
2644 {
2645         if (!ports[pi].xstats_info.allocated)
2646                 return;
2647         free(ports[pi].xstats_info.ids_supp);
2648         free(ports[pi].xstats_info.prev_values);
2649         free(ports[pi].xstats_info.curr_values);
2650         ports[pi].xstats_info.allocated = false;
2651 }
2652
2653 /** Fill helper structures for specified port to show extended statistics. */
2654 static void
2655 fill_xstats_display_info_for_port(portid_t pi)
2656 {
2657         unsigned int stat, stat_supp;
2658         const char *xstat_name;
2659         struct rte_port *port;
2660         uint64_t *ids_supp;
2661         int rc;
2662
2663         if (xstats_display_num == 0)
2664                 return;
2665
2666         if (pi == (portid_t)RTE_PORT_ALL) {
2667                 fill_xstats_display_info();
2668                 return;
2669         }
2670
2671         port = &ports[pi];
2672         if (port->port_status != RTE_PORT_STARTED)
2673                 return;
2674
2675         if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2676                 rte_exit(EXIT_FAILURE,
2677                          "Failed to allocate xstats display memory\n");
2678
2679         ids_supp = port->xstats_info.ids_supp;
2680         for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2681                 xstat_name = xstats_display[stat].name;
2682                 rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2683                                                    ids_supp + stat_supp);
2684                 if (rc != 0) {
2685                         fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2686                                 xstat_name, pi, stat);
2687                         continue;
2688                 }
2689                 stat_supp++;
2690         }
2691
2692         port->xstats_info.ids_supp_sz = stat_supp;
2693 }
2694
2695 /** Fill helper structures for all ports to show extended statistics. */
2696 static void
2697 fill_xstats_display_info(void)
2698 {
2699         portid_t pi;
2700
2701         if (xstats_display_num == 0)
2702                 return;
2703
2704         RTE_ETH_FOREACH_DEV(pi)
2705                 fill_xstats_display_info_for_port(pi);
2706 }
2707
2708 int
2709 start_port(portid_t pid)
2710 {
2711         int diag, need_check_link_status = -1;
2712         portid_t pi;
2713         portid_t p_pi = RTE_MAX_ETHPORTS;
2714         portid_t pl[RTE_MAX_ETHPORTS];
2715         portid_t peer_pl[RTE_MAX_ETHPORTS];
2716         uint16_t cnt_pi = 0;
2717         uint16_t cfg_pi = 0;
2718         int peer_pi;
2719         queueid_t qi;
2720         struct rte_port *port;
2721         struct rte_eth_hairpin_cap cap;
2722
2723         if (port_id_is_invalid(pid, ENABLED_WARN))
2724                 return 0;
2725
2726         RTE_ETH_FOREACH_DEV(pi) {
2727                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2728                         continue;
2729
2730                 need_check_link_status = 0;
2731                 port = &ports[pi];
2732                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2733                                                  RTE_PORT_HANDLING) == 0) {
2734                         fprintf(stderr, "Port %d is now not stopped\n", pi);
2735                         continue;
2736                 }
2737
2738                 if (port->need_reconfig > 0) {
2739                         struct rte_eth_conf dev_conf;
2740                         int k;
2741
2742                         port->need_reconfig = 0;
2743
2744                         if (flow_isolate_all) {
2745                                 int ret = port_flow_isolate(pi, 1);
2746                                 if (ret) {
2747                                         fprintf(stderr,
2748                                                 "Failed to apply isolated mode on port %d\n",
2749                                                 pi);
2750                                         return -1;
2751                                 }
2752                         }
2753                         configure_rxtx_dump_callbacks(0);
2754                         printf("Configuring Port %d (socket %u)\n", pi,
2755                                         port->socket_id);
2756                         if (nb_hairpinq > 0 &&
2757                             rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2758                                 fprintf(stderr,
2759                                         "Port %d doesn't support hairpin queues\n",
2760                                         pi);
2761                                 return -1;
2762                         }
2763
2764                         /* configure port */
2765                         diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2766                                                      nb_txq + nb_hairpinq,
2767                                                      &(port->dev_conf));
2768                         if (diag != 0) {
2769                                 if (rte_atomic16_cmpset(&(port->port_status),
2770                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2771                                         fprintf(stderr,
2772                                                 "Port %d can not be set back to stopped\n",
2773                                                 pi);
2774                                 fprintf(stderr, "Fail to configure port %d\n",
2775                                         pi);
2776                                 /* try to reconfigure port next time */
2777                                 port->need_reconfig = 1;
2778                                 return -1;
2779                         }
2780                         /* get device configuration*/
2781                         if (0 !=
2782                                 eth_dev_conf_get_print_err(pi, &dev_conf)) {
2783                                 fprintf(stderr,
2784                                         "port %d can not get device configuration\n",
2785                                         pi);
2786                                 return -1;
2787                         }
2788                         /* Apply Rx offloads configuration */
2789                         if (dev_conf.rxmode.offloads !=
2790                             port->dev_conf.rxmode.offloads) {
2791                                 port->dev_conf.rxmode.offloads |=
2792                                         dev_conf.rxmode.offloads;
2793                                 for (k = 0;
2794                                      k < port->dev_info.max_rx_queues;
2795                                      k++)
2796                                         port->rx_conf[k].offloads |=
2797                                                 dev_conf.rxmode.offloads;
2798                         }
2799                         /* Apply Tx offloads configuration */
2800                         if (dev_conf.txmode.offloads !=
2801                             port->dev_conf.txmode.offloads) {
2802                                 port->dev_conf.txmode.offloads |=
2803                                         dev_conf.txmode.offloads;
2804                                 for (k = 0;
2805                                      k < port->dev_info.max_tx_queues;
2806                                      k++)
2807                                         port->tx_conf[k].offloads |=
2808                                                 dev_conf.txmode.offloads;
2809                         }
2810                 }
2811                 if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2812                         port->need_reconfig_queues = 0;
2813                         /* setup tx queues */
2814                         for (qi = 0; qi < nb_txq; qi++) {
2815                                 if ((numa_support) &&
2816                                         (txring_numa[pi] != NUMA_NO_CONFIG))
2817                                         diag = rte_eth_tx_queue_setup(pi, qi,
2818                                                 port->nb_tx_desc[qi],
2819                                                 txring_numa[pi],
2820                                                 &(port->tx_conf[qi]));
2821                                 else
2822                                         diag = rte_eth_tx_queue_setup(pi, qi,
2823                                                 port->nb_tx_desc[qi],
2824                                                 port->socket_id,
2825                                                 &(port->tx_conf[qi]));
2826
2827                                 if (diag == 0)
2828                                         continue;
2829
2830                                 /* Fail to setup tx queue, return */
2831                                 if (rte_atomic16_cmpset(&(port->port_status),
2832                                                         RTE_PORT_HANDLING,
2833                                                         RTE_PORT_STOPPED) == 0)
2834                                         fprintf(stderr,
2835                                                 "Port %d can not be set back to stopped\n",
2836                                                 pi);
2837                                 fprintf(stderr,
2838                                         "Fail to configure port %d tx queues\n",
2839                                         pi);
2840                                 /* try to reconfigure queues next time */
2841                                 port->need_reconfig_queues = 1;
2842                                 return -1;
2843                         }
2844                         for (qi = 0; qi < nb_rxq; qi++) {
2845                                 /* setup rx queues */
2846                                 if ((numa_support) &&
2847                                         (rxring_numa[pi] != NUMA_NO_CONFIG)) {
2848                                         struct rte_mempool * mp =
2849                                                 mbuf_pool_find
2850                                                         (rxring_numa[pi], 0);
2851                                         if (mp == NULL) {
2852                                                 fprintf(stderr,
2853                                                         "Failed to setup RX queue: No mempool allocation on the socket %d\n",
2854                                                         rxring_numa[pi]);
2855                                                 return -1;
2856                                         }
2857
2858                                         diag = rx_queue_setup(pi, qi,
2859                                              port->nb_rx_desc[qi],
2860                                              rxring_numa[pi],
2861                                              &(port->rx_conf[qi]),
2862                                              mp);
2863                                 } else {
2864                                         struct rte_mempool *mp =
2865                                                 mbuf_pool_find
2866                                                         (port->socket_id, 0);
2867                                         if (mp == NULL) {
2868                                                 fprintf(stderr,
2869                                                         "Failed to setup RX queue: No mempool allocation on the socket %d\n",
2870                                                         port->socket_id);
2871                                                 return -1;
2872                                         }
2873                                         diag = rx_queue_setup(pi, qi,
2874                                              port->nb_rx_desc[qi],
2875                                              port->socket_id,
2876                                              &(port->rx_conf[qi]),
2877                                              mp);
2878                                 }
2879                                 if (diag == 0)
2880                                         continue;
2881
2882                                 /* Fail to setup rx queue, return */
2883                                 if (rte_atomic16_cmpset(&(port->port_status),
2884                                                         RTE_PORT_HANDLING,
2885                                                         RTE_PORT_STOPPED) == 0)
2886                                         fprintf(stderr,
2887                                                 "Port %d can not be set back to stopped\n",
2888                                                 pi);
2889                                 fprintf(stderr,
2890                                         "Fail to configure port %d rx queues\n",
2891                                         pi);
2892                                 /* try to reconfigure queues next time */
2893                                 port->need_reconfig_queues = 1;
2894                                 return -1;
2895                         }
2896                         /* setup hairpin queues */
2897                         if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2898                                 return -1;
2899                 }
2900                 configure_rxtx_dump_callbacks(verbose_level);
2901                 if (clear_ptypes) {
2902                         diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2903                                         NULL, 0);
2904                         if (diag < 0)
2905                                 fprintf(stderr,
2906                                         "Port %d: Failed to disable Ptype parsing\n",
2907                                         pi);
2908                 }
2909
2910                 p_pi = pi;
2911                 cnt_pi++;
2912
2913                 /* start port */
2914                 diag = eth_dev_start_mp(pi);
2915                 if (diag < 0) {
2916                         fprintf(stderr, "Fail to start port %d: %s\n",
2917                                 pi, rte_strerror(-diag));
2918
2919                         /* Fail to setup rx queue, return */
2920                         if (rte_atomic16_cmpset(&(port->port_status),
2921                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2922                                 fprintf(stderr,
2923                                         "Port %d can not be set back to stopped\n",
2924                                         pi);
2925                         continue;
2926                 }
2927
2928                 if (rte_atomic16_cmpset(&(port->port_status),
2929                         RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2930                         fprintf(stderr, "Port %d can not be set into started\n",
2931                                 pi);
2932
2933                 if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2934                         printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
2935                                         RTE_ETHER_ADDR_BYTES(&port->eth_addr));
2936
2937                 /* at least one port started, need checking link status */
2938                 need_check_link_status = 1;
2939
2940                 pl[cfg_pi++] = pi;
2941         }
2942
2943         if (need_check_link_status == 1 && !no_link_check)
2944                 check_all_ports_link_status(RTE_PORT_ALL);
2945         else if (need_check_link_status == 0)
2946                 fprintf(stderr, "Please stop the ports first\n");
2947
2948         if (hairpin_mode & 0xf) {
2949                 uint16_t i;
2950                 int j;
2951
2952                 /* bind all started hairpin ports */
2953                 for (i = 0; i < cfg_pi; i++) {
2954                         pi = pl[i];
2955                         /* bind current Tx to all peer Rx */
2956                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2957                                                         RTE_MAX_ETHPORTS, 1);
2958                         if (peer_pi < 0)
2959                                 return peer_pi;
2960                         for (j = 0; j < peer_pi; j++) {
2961                                 if (!port_is_started(peer_pl[j]))
2962                                         continue;
2963                                 diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2964                                 if (diag < 0) {
2965                                         fprintf(stderr,
2966                                                 "Error during binding hairpin Tx port %u to %u: %s\n",
2967                                                 pi, peer_pl[j],
2968                                                 rte_strerror(-diag));
2969                                         return -1;
2970                                 }
2971                         }
2972                         /* bind all peer Tx to current Rx */
2973                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2974                                                         RTE_MAX_ETHPORTS, 0);
2975                         if (peer_pi < 0)
2976                                 return peer_pi;
2977                         for (j = 0; j < peer_pi; j++) {
2978                                 if (!port_is_started(peer_pl[j]))
2979                                         continue;
2980                                 diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2981                                 if (diag < 0) {
2982                                         fprintf(stderr,
2983                                                 "Error during binding hairpin Tx port %u to %u: %s\n",
2984                                                 peer_pl[j], pi,
2985                                                 rte_strerror(-diag));
2986                                         return -1;
2987                                 }
2988                         }
2989                 }
2990         }
2991
2992         fill_xstats_display_info_for_port(pid);
2993
2994         printf("Done\n");
2995         return 0;
2996 }
2997
2998 void
2999 stop_port(portid_t pid)
3000 {
3001         portid_t pi;
3002         struct rte_port *port;
3003         int need_check_link_status = 0;
3004         portid_t peer_pl[RTE_MAX_ETHPORTS];
3005         int peer_pi;
3006
3007         if (port_id_is_invalid(pid, ENABLED_WARN))
3008                 return;
3009
3010         printf("Stopping ports...\n");
3011
3012         RTE_ETH_FOREACH_DEV(pi) {
3013                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3014                         continue;
3015
3016                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
3017                         fprintf(stderr,
3018                                 "Please remove port %d from forwarding configuration.\n",
3019                                 pi);
3020                         continue;
3021                 }
3022
3023                 if (port_is_bonding_slave(pi)) {
3024                         fprintf(stderr,
3025                                 "Please remove port %d from bonded device.\n",
3026                                 pi);
3027                         continue;
3028                 }
3029
3030                 port = &ports[pi];
3031                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
3032                                                 RTE_PORT_HANDLING) == 0)
3033                         continue;
3034
3035                 if (hairpin_mode & 0xf) {
3036                         int j;
3037
3038                         rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3039                         /* unbind all peer Tx from current Rx */
3040                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3041                                                         RTE_MAX_ETHPORTS, 0);
3042                         if (peer_pi < 0)
3043                                 continue;
3044                         for (j = 0; j < peer_pi; j++) {
3045                                 if (!port_is_started(peer_pl[j]))
3046                                         continue;
3047                                 rte_eth_hairpin_unbind(peer_pl[j], pi);
3048                         }
3049                 }
3050
3051                 if (port->flow_list)
3052                         port_flow_flush(pi);
3053
3054                 if (eth_dev_stop_mp(pi) != 0)
3055                         RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3056                                 pi);
3057
3058                 if (rte_atomic16_cmpset(&(port->port_status),
3059                         RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
3060                         fprintf(stderr, "Port %d can not be set into stopped\n",
3061                                 pi);
3062                 need_check_link_status = 1;
3063         }
3064         if (need_check_link_status && !no_link_check)
3065                 check_all_ports_link_status(RTE_PORT_ALL);
3066
3067         printf("Done\n");
3068 }
3069
3070 static void
3071 remove_invalid_ports_in(portid_t *array, portid_t *total)
3072 {
3073         portid_t i;
3074         portid_t new_total = 0;
3075
3076         for (i = 0; i < *total; i++)
3077                 if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3078                         array[new_total] = array[i];
3079                         new_total++;
3080                 }
3081         *total = new_total;
3082 }
3083
3084 static void
3085 remove_invalid_ports(void)
3086 {
3087         remove_invalid_ports_in(ports_ids, &nb_ports);
3088         remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3089         nb_cfg_ports = nb_fwd_ports;
3090 }
3091
3092 void
3093 close_port(portid_t pid)
3094 {
3095         portid_t pi;
3096         struct rte_port *port;
3097
3098         if (port_id_is_invalid(pid, ENABLED_WARN))
3099                 return;
3100
3101         printf("Closing ports...\n");
3102
3103         RTE_ETH_FOREACH_DEV(pi) {
3104                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3105                         continue;
3106
3107                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
3108                         fprintf(stderr,
3109                                 "Please remove port %d from forwarding configuration.\n",
3110                                 pi);
3111                         continue;
3112                 }
3113
3114                 if (port_is_bonding_slave(pi)) {
3115                         fprintf(stderr,
3116                                 "Please remove port %d from bonded device.\n",
3117                                 pi);
3118                         continue;
3119                 }
3120
3121                 port = &ports[pi];
3122                 if (rte_atomic16_cmpset(&(port->port_status),
3123                         RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
3124                         fprintf(stderr, "Port %d is already closed\n", pi);
3125                         continue;
3126                 }
3127
3128                 if (is_proc_primary()) {
3129                         port_flow_flush(pi);
3130                         port_flex_item_flush(pi);
3131                         rte_eth_dev_close(pi);
3132                 }
3133
3134                 free_xstats_display_info(pi);
3135         }
3136
3137         remove_invalid_ports();
3138         printf("Done\n");
3139 }
3140
3141 void
3142 reset_port(portid_t pid)
3143 {
3144         int diag;
3145         portid_t pi;
3146         struct rte_port *port;
3147
3148         if (port_id_is_invalid(pid, ENABLED_WARN))
3149                 return;
3150
3151         if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3152                 (pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3153                 fprintf(stderr,
3154                         "Can not reset port(s), please stop port(s) first.\n");
3155                 return;
3156         }
3157
3158         printf("Resetting ports...\n");
3159
3160         RTE_ETH_FOREACH_DEV(pi) {
3161                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3162                         continue;
3163
3164                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
3165                         fprintf(stderr,
3166                                 "Please remove port %d from forwarding configuration.\n",
3167                                 pi);
3168                         continue;
3169                 }
3170
3171                 if (port_is_bonding_slave(pi)) {
3172                         fprintf(stderr,
3173                                 "Please remove port %d from bonded device.\n",
3174                                 pi);
3175                         continue;
3176                 }
3177
3178                 diag = rte_eth_dev_reset(pi);
3179                 if (diag == 0) {
3180                         port = &ports[pi];
3181                         port->need_reconfig = 1;
3182                         port->need_reconfig_queues = 1;
3183                 } else {
3184                         fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3185                                 pi, diag);
3186                 }
3187         }
3188
3189         printf("Done\n");
3190 }
3191
3192 void
3193 attach_port(char *identifier)
3194 {
3195         portid_t pi;
3196         struct rte_dev_iterator iterator;
3197
3198         printf("Attaching a new port...\n");
3199
3200         if (identifier == NULL) {
3201                 fprintf(stderr, "Invalid parameters are specified\n");
3202                 return;
3203         }
3204
3205         if (rte_dev_probe(identifier) < 0) {
3206                 TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3207                 return;
3208         }
3209
3210         /* first attach mode: event */
3211         if (setup_on_probe_event) {
3212                 /* new ports are detected on RTE_ETH_EVENT_NEW event */
3213                 for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3214                         if (ports[pi].port_status == RTE_PORT_HANDLING &&
3215                                         ports[pi].need_setup != 0)
3216                                 setup_attached_port(pi);
3217                 return;
3218         }
3219
3220         /* second attach mode: iterator */
3221         RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3222                 /* setup ports matching the devargs used for probing */
3223                 if (port_is_forwarding(pi))
3224                         continue; /* port was already attached before */
3225                 setup_attached_port(pi);
3226         }
3227 }
3228
3229 static void
3230 setup_attached_port(portid_t pi)
3231 {
3232         unsigned int socket_id;
3233         int ret;
3234
3235         socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3236         /* if socket_id is invalid, set to the first available socket. */
3237         if (check_socket_id(socket_id) < 0)
3238                 socket_id = socket_ids[0];
3239         reconfig(pi, socket_id);
3240         ret = rte_eth_promiscuous_enable(pi);
3241         if (ret != 0)
3242                 fprintf(stderr,
3243                         "Error during enabling promiscuous mode for port %u: %s - ignore\n",
3244                         pi, rte_strerror(-ret));
3245
3246         ports_ids[nb_ports++] = pi;
3247         fwd_ports_ids[nb_fwd_ports++] = pi;
3248         nb_cfg_ports = nb_fwd_ports;
3249         ports[pi].need_setup = 0;
3250         ports[pi].port_status = RTE_PORT_STOPPED;
3251
3252         printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3253         printf("Done\n");
3254 }
3255
3256 static void
3257 detach_device(struct rte_device *dev)
3258 {
3259         portid_t sibling;
3260
3261         if (dev == NULL) {
3262                 fprintf(stderr, "Device already removed\n");
3263                 return;
3264         }
3265
3266         printf("Removing a device...\n");
3267
3268         RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3269                 if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3270                         if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3271                                 fprintf(stderr, "Port %u not stopped\n",
3272                                         sibling);
3273                                 return;
3274                         }
3275                         port_flow_flush(sibling);
3276                 }
3277         }
3278
3279         if (rte_dev_remove(dev) < 0) {
3280                 TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
3281                 return;
3282         }
3283         remove_invalid_ports();
3284
3285         printf("Device is detached\n");
3286         printf("Now total ports is %d\n", nb_ports);
3287         printf("Done\n");
3288         return;
3289 }
3290
3291 void
3292 detach_port_device(portid_t port_id)
3293 {
3294         int ret;
3295         struct rte_eth_dev_info dev_info;
3296
3297         if (port_id_is_invalid(port_id, ENABLED_WARN))
3298                 return;
3299
3300         if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3301                 if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3302                         fprintf(stderr, "Port not stopped\n");
3303                         return;
3304                 }
3305                 fprintf(stderr, "Port was not closed\n");
3306         }
3307
3308         ret = eth_dev_info_get_print_err(port_id, &dev_info);
3309         if (ret != 0) {
3310                 TESTPMD_LOG(ERR,
3311                         "Failed to get device info for port %d, not detaching\n",
3312                         port_id);
3313                 return;
3314         }
3315         detach_device(dev_info.device);
3316 }
3317
3318 void
3319 detach_devargs(char *identifier)
3320 {
3321         struct rte_dev_iterator iterator;
3322         struct rte_devargs da;
3323         portid_t port_id;
3324
3325         printf("Removing a device...\n");
3326
3327         memset(&da, 0, sizeof(da));
3328         if (rte_devargs_parsef(&da, "%s", identifier)) {
3329                 fprintf(stderr, "cannot parse identifier\n");
3330                 return;
3331         }
3332
3333         RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3334                 if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3335                         if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3336                                 fprintf(stderr, "Port %u not stopped\n",
3337                                         port_id);
3338                                 rte_eth_iterator_cleanup(&iterator);
3339                                 rte_devargs_reset(&da);
3340                                 return;
3341                         }
3342                         port_flow_flush(port_id);
3343                 }
3344         }
3345
3346         if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3347                 TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3348                             da.name, da.bus->name);
3349                 rte_devargs_reset(&da);
3350                 return;
3351         }
3352
3353         remove_invalid_ports();
3354
3355         printf("Device %s is detached\n", identifier);
3356         printf("Now total ports is %d\n", nb_ports);
3357         printf("Done\n");
3358         rte_devargs_reset(&da);
3359 }
3360
3361 void
3362 pmd_test_exit(void)
3363 {
3364         portid_t pt_id;
3365         unsigned int i;
3366         int ret;
3367
3368         if (test_done == 0)
3369                 stop_packet_forwarding();
3370
3371 #ifndef RTE_EXEC_ENV_WINDOWS
3372         for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3373                 if (mempools[i]) {
3374                         if (mp_alloc_type == MP_ALLOC_ANON)
3375                                 rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3376                                                      NULL);
3377                 }
3378         }
3379 #endif
3380         if (ports != NULL) {
3381                 no_link_check = 1;
3382                 RTE_ETH_FOREACH_DEV(pt_id) {
3383                         printf("\nStopping port %d...\n", pt_id);
3384                         fflush(stdout);
3385                         stop_port(pt_id);
3386                 }
3387                 RTE_ETH_FOREACH_DEV(pt_id) {
3388                         printf("\nShutting down port %d...\n", pt_id);
3389                         fflush(stdout);
3390                         close_port(pt_id);
3391                 }
3392         }
3393
3394         if (hot_plug) {
3395                 ret = rte_dev_event_monitor_stop();
3396                 if (ret) {
3397                         RTE_LOG(ERR, EAL,
3398                                 "fail to stop device event monitor.");
3399                         return;
3400                 }
3401
3402                 ret = rte_dev_event_callback_unregister(NULL,
3403                         dev_event_callback, NULL);
3404                 if (ret < 0) {
3405                         RTE_LOG(ERR, EAL,
3406                                 "fail to unregister device event callback.\n");
3407                         return;
3408                 }
3409
3410                 ret = rte_dev_hotplug_handle_disable();
3411                 if (ret) {
3412                         RTE_LOG(ERR, EAL,
3413                                 "fail to disable hotplug handling.\n");
3414                         return;
3415                 }
3416         }
3417         for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3418                 if (mempools[i])
3419                         mempool_free_mp(mempools[i]);
3420         }
3421         free(xstats_display);
3422
3423         printf("\nBye...\n");
3424 }
3425
3426 typedef void (*cmd_func_t)(void);
3427 struct pmd_test_command {
3428         const char *cmd_name;
3429         cmd_func_t cmd_func;
3430 };
3431
3432 /* Check the link status of all ports in up to 9s, and print them finally */
3433 static void
3434 check_all_ports_link_status(uint32_t port_mask)
3435 {
3436 #define CHECK_INTERVAL 100 /* 100ms */
3437 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3438         portid_t portid;
3439         uint8_t count, all_ports_up, print_flag = 0;
3440         struct rte_eth_link link;
3441         int ret;
3442         char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3443
3444         printf("Checking link statuses...\n");
3445         fflush(stdout);
3446         for (count = 0; count <= MAX_CHECK_TIME; count++) {
3447                 all_ports_up = 1;
3448                 RTE_ETH_FOREACH_DEV(portid) {
3449                         if ((port_mask & (1 << portid)) == 0)
3450                                 continue;
3451                         memset(&link, 0, sizeof(link));
3452                         ret = rte_eth_link_get_nowait(portid, &link);
3453                         if (ret < 0) {
3454                                 all_ports_up = 0;
3455                                 if (print_flag == 1)
3456                                         fprintf(stderr,
3457                                                 "Port %u link get failed: %s\n",
3458                                                 portid, rte_strerror(-ret));
3459                                 continue;
3460                         }
3461                         /* print link status if flag set */
3462                         if (print_flag == 1) {
3463                                 rte_eth_link_to_str(link_status,
3464                                         sizeof(link_status), &link);
3465                                 printf("Port %d %s\n", portid, link_status);
3466                                 continue;
3467                         }
3468                         /* clear all_ports_up flag if any link down */
3469                         if (link.link_status == ETH_LINK_DOWN) {
3470                                 all_ports_up = 0;
3471                                 break;
3472                         }
3473                 }
3474                 /* after finally printing all link status, get out */
3475                 if (print_flag == 1)
3476                         break;
3477
3478                 if (all_ports_up == 0) {
3479                         fflush(stdout);
3480                         rte_delay_ms(CHECK_INTERVAL);
3481                 }
3482
3483                 /* set the print_flag if all ports up or timeout */
3484                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3485                         print_flag = 1;
3486                 }
3487
3488                 if (lsc_interrupt)
3489                         break;
3490         }
3491 }
3492
3493 static void
3494 rmv_port_callback(void *arg)
3495 {
3496         int need_to_start = 0;
3497         int org_no_link_check = no_link_check;
3498         portid_t port_id = (intptr_t)arg;
3499         struct rte_eth_dev_info dev_info;
3500         int ret;
3501
3502         RTE_ETH_VALID_PORTID_OR_RET(port_id);
3503
3504         if (!test_done && port_is_forwarding(port_id)) {
3505                 need_to_start = 1;
3506                 stop_packet_forwarding();
3507         }
3508         no_link_check = 1;
3509         stop_port(port_id);
3510         no_link_check = org_no_link_check;
3511
3512         ret = eth_dev_info_get_print_err(port_id, &dev_info);
3513         if (ret != 0)
3514                 TESTPMD_LOG(ERR,
3515                         "Failed to get device info for port %d, not detaching\n",
3516                         port_id);
3517         else {
3518                 struct rte_device *device = dev_info.device;
3519                 close_port(port_id);
3520                 detach_device(device); /* might be already removed or have more ports */
3521         }
3522         if (need_to_start)
3523                 start_packet_forwarding(0);
3524 }
3525
3526 /* This function is used by the interrupt thread */
3527 static int
3528 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3529                   void *ret_param)
3530 {
3531         RTE_SET_USED(param);
3532         RTE_SET_USED(ret_param);
3533
3534         if (type >= RTE_ETH_EVENT_MAX) {
3535                 fprintf(stderr,
3536                         "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3537                         port_id, __func__, type);
3538                 fflush(stderr);
3539         } else if (event_print_mask & (UINT32_C(1) << type)) {
3540                 printf("\nPort %" PRIu16 ": %s event\n", port_id,
3541                         eth_event_desc[type]);
3542                 fflush(stdout);
3543         }
3544
3545         switch (type) {
3546         case RTE_ETH_EVENT_NEW:
3547                 ports[port_id].need_setup = 1;
3548                 ports[port_id].port_status = RTE_PORT_HANDLING;
3549                 break;
3550         case RTE_ETH_EVENT_INTR_RMV:
3551                 if (port_id_is_invalid(port_id, DISABLED_WARN))
3552                         break;
3553                 if (rte_eal_alarm_set(100000,
3554                                 rmv_port_callback, (void *)(intptr_t)port_id))
3555                         fprintf(stderr,
3556                                 "Could not set up deferred device removal\n");
3557                 break;
3558         case RTE_ETH_EVENT_DESTROY:
3559                 ports[port_id].port_status = RTE_PORT_CLOSED;
3560                 printf("Port %u is closed\n", port_id);
3561                 break;
3562         default:
3563                 break;
3564         }
3565         return 0;
3566 }
3567
3568 static int
3569 register_eth_event_callback(void)
3570 {
3571         int ret;
3572         enum rte_eth_event_type event;
3573
3574         for (event = RTE_ETH_EVENT_UNKNOWN;
3575                         event < RTE_ETH_EVENT_MAX; event++) {
3576                 ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3577                                 event,
3578                                 eth_event_callback,
3579                                 NULL);
3580                 if (ret != 0) {
3581                         TESTPMD_LOG(ERR, "Failed to register callback for "
3582                                         "%s event\n", eth_event_desc[event]);
3583                         return -1;
3584                 }
3585         }
3586
3587         return 0;
3588 }
3589
3590 /* This function is used by the interrupt thread */
3591 static void
3592 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3593                              __rte_unused void *arg)
3594 {
3595         uint16_t port_id;
3596         int ret;
3597
3598         if (type >= RTE_DEV_EVENT_MAX) {
3599                 fprintf(stderr, "%s called upon invalid event %d\n",
3600                         __func__, type);
3601                 fflush(stderr);
3602         }
3603
3604         switch (type) {
3605         case RTE_DEV_EVENT_REMOVE:
3606                 RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3607                         device_name);
3608                 ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3609                 if (ret) {
3610                         RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3611                                 device_name);
3612                         return;
3613                 }
3614                 /*
3615                  * Because the user's callback is invoked in eal interrupt
3616                  * callback, the interrupt callback need to be finished before
3617                  * it can be unregistered when detaching device. So finish
3618                  * callback soon and use a deferred removal to detach device
3619                  * is need. It is a workaround, once the device detaching be
3620                  * moved into the eal in the future, the deferred removal could
3621                  * be deleted.
3622                  */
3623                 if (rte_eal_alarm_set(100000,
3624                                 rmv_port_callback, (void *)(intptr_t)port_id))
3625                         RTE_LOG(ERR, EAL,
3626                                 "Could not set up deferred device removal\n");
3627                 break;
3628         case RTE_DEV_EVENT_ADD:
3629                 RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3630                         device_name);
3631                 /* TODO: After finish kernel driver binding,
3632                  * begin to attach port.
3633                  */
3634                 break;
3635         default:
3636                 break;
3637         }
3638 }
3639
3640 static void
3641 rxtx_port_config(portid_t pid)
3642 {
3643         uint16_t qid;
3644         uint64_t offloads;
3645         struct rte_port *port = &ports[pid];
3646
3647         for (qid = 0; qid < nb_rxq; qid++) {
3648                 offloads = port->rx_conf[qid].offloads;
3649                 port->rx_conf[qid] = port->dev_info.default_rxconf;
3650
3651                 if (rxq_share > 0 &&
3652                     (port->dev_info.dev_capa & RTE_ETH_DEV_CAPA_RXQ_SHARE)) {
3653                         /* Non-zero share group to enable RxQ share. */
3654                         port->rx_conf[qid].share_group = pid / rxq_share + 1;
3655                         port->rx_conf[qid].share_qid = qid; /* Equal mapping. */
3656                 }
3657
3658                 if (offloads != 0)
3659                         port->rx_conf[qid].offloads = offloads;
3660
3661                 /* Check if any Rx parameters have been passed */
3662                 if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3663                         port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3664
3665                 if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3666                         port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3667
3668                 if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3669                         port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3670
3671                 if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3672                         port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3673
3674                 if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3675                         port->rx_conf[qid].rx_drop_en = rx_drop_en;
3676
3677                 port->nb_rx_desc[qid] = nb_rxd;
3678         }
3679
3680         for (qid = 0; qid < nb_txq; qid++) {
3681                 offloads = port->tx_conf[qid].offloads;
3682                 port->tx_conf[qid] = port->dev_info.default_txconf;
3683                 if (offloads != 0)
3684                         port->tx_conf[qid].offloads = offloads;
3685
3686                 /* Check if any Tx parameters have been passed */
3687                 if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3688                         port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3689
3690                 if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3691                         port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3692
3693                 if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3694                         port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3695
3696                 if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3697                         port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3698
3699                 if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3700                         port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3701
3702                 port->nb_tx_desc[qid] = nb_txd;
3703         }
3704 }
3705
3706 /*
3707  * Helper function to set MTU from frame size
3708  *
3709  * port->dev_info should be set before calling this function.
3710  *
3711  * return 0 on success, negative on error
3712  */
3713 int
3714 update_mtu_from_frame_size(portid_t portid, uint32_t max_rx_pktlen)
3715 {
3716         struct rte_port *port = &ports[portid];
3717         uint32_t eth_overhead;
3718         uint16_t mtu, new_mtu;
3719
3720         eth_overhead = get_eth_overhead(&port->dev_info);
3721
3722         if (rte_eth_dev_get_mtu(portid, &mtu) != 0) {
3723                 printf("Failed to get MTU for port %u\n", portid);
3724                 return -1;
3725         }
3726
3727         new_mtu = max_rx_pktlen - eth_overhead;
3728
3729         if (mtu == new_mtu)
3730                 return 0;
3731
3732         if (eth_dev_set_mtu_mp(portid, new_mtu) != 0) {
3733                 fprintf(stderr,
3734                         "Failed to set MTU to %u for port %u\n",
3735                         new_mtu, portid);
3736                 return -1;
3737         }
3738
3739         port->dev_conf.rxmode.mtu = new_mtu;
3740
3741         return 0;
3742 }
3743
3744 void
3745 init_port_config(void)
3746 {
3747         portid_t pid;
3748         struct rte_port *port;
3749         int ret, i;
3750
3751         RTE_ETH_FOREACH_DEV(pid) {
3752                 port = &ports[pid];
3753                 port->dev_conf.fdir_conf = fdir_conf;
3754
3755                 ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3756                 if (ret != 0)
3757                         return;
3758
3759                 if (nb_rxq > 1) {
3760                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3761                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3762                                 rss_hf & port->dev_info.flow_type_rss_offloads;
3763                 } else {
3764                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3765                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3766                 }
3767
3768                 if (port->dcb_flag == 0) {
3769                         if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
3770                                 port->dev_conf.rxmode.mq_mode =
3771                                         (enum rte_eth_rx_mq_mode)
3772                                                 (rx_mq_mode & ETH_MQ_RX_RSS);
3773                         } else {
3774                                 port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3775                                 port->dev_conf.rxmode.offloads &=
3776                                                 ~DEV_RX_OFFLOAD_RSS_HASH;
3777
3778                                 for (i = 0;
3779                                      i < port->dev_info.nb_rx_queues;
3780                                      i++)
3781                                         port->rx_conf[i].offloads &=
3782                                                 ~DEV_RX_OFFLOAD_RSS_HASH;
3783                         }
3784                 }
3785
3786                 rxtx_port_config(pid);
3787
3788                 ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3789                 if (ret != 0)
3790                         return;
3791
3792 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3793                 rte_pmd_ixgbe_bypass_init(pid);
3794 #endif
3795
3796                 if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3797                         port->dev_conf.intr_conf.lsc = 1;
3798                 if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3799                         port->dev_conf.intr_conf.rmv = 1;
3800         }
3801 }
3802
3803 void set_port_slave_flag(portid_t slave_pid)
3804 {
3805         struct rte_port *port;
3806
3807         port = &ports[slave_pid];
3808         port->slave_flag = 1;
3809 }
3810
3811 void clear_port_slave_flag(portid_t slave_pid)
3812 {
3813         struct rte_port *port;
3814
3815         port = &ports[slave_pid];
3816         port->slave_flag = 0;
3817 }
3818
3819 uint8_t port_is_bonding_slave(portid_t slave_pid)
3820 {
3821         struct rte_port *port;
3822         struct rte_eth_dev_info dev_info;
3823         int ret;
3824
3825         port = &ports[slave_pid];
3826         ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3827         if (ret != 0) {
3828                 TESTPMD_LOG(ERR,
3829                         "Failed to get device info for port id %d,"
3830                         "cannot determine if the port is a bonded slave",
3831                         slave_pid);
3832                 return 0;
3833         }
3834         if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3835                 return 1;
3836         return 0;
3837 }
3838
3839 const uint16_t vlan_tags[] = {
3840                 0,  1,  2,  3,  4,  5,  6,  7,
3841                 8,  9, 10, 11,  12, 13, 14, 15,
3842                 16, 17, 18, 19, 20, 21, 22, 23,
3843                 24, 25, 26, 27, 28, 29, 30, 31
3844 };
3845
3846 static  int
3847 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3848                  enum dcb_mode_enable dcb_mode,
3849                  enum rte_eth_nb_tcs num_tcs,
3850                  uint8_t pfc_en)
3851 {
3852         uint8_t i;
3853         int32_t rc;
3854         struct rte_eth_rss_conf rss_conf;
3855
3856         /*
3857          * Builds up the correct configuration for dcb+vt based on the vlan tags array
3858          * given above, and the number of traffic classes available for use.
3859          */
3860         if (dcb_mode == DCB_VT_ENABLED) {
3861                 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3862                                 &eth_conf->rx_adv_conf.vmdq_dcb_conf;
3863                 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3864                                 &eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3865
3866                 /* VMDQ+DCB RX and TX configurations */
3867                 vmdq_rx_conf->enable_default_pool = 0;
3868                 vmdq_rx_conf->default_pool = 0;
3869                 vmdq_rx_conf->nb_queue_pools =
3870                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3871                 vmdq_tx_conf->nb_queue_pools =
3872                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3873
3874                 vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3875                 for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3876                         vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3877                         vmdq_rx_conf->pool_map[i].pools =
3878                                 1 << (i % vmdq_rx_conf->nb_queue_pools);
3879                 }
3880                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3881                         vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3882                         vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3883                 }
3884
3885                 /* set DCB mode of RX and TX of multiple queues */
3886                 eth_conf->rxmode.mq_mode =
3887                                 (enum rte_eth_rx_mq_mode)
3888                                         (rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3889                 eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3890         } else {
3891                 struct rte_eth_dcb_rx_conf *rx_conf =
3892                                 &eth_conf->rx_adv_conf.dcb_rx_conf;
3893                 struct rte_eth_dcb_tx_conf *tx_conf =
3894                                 &eth_conf->tx_adv_conf.dcb_tx_conf;
3895
3896                 memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3897
3898                 rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3899                 if (rc != 0)
3900                         return rc;
3901
3902                 rx_conf->nb_tcs = num_tcs;
3903                 tx_conf->nb_tcs = num_tcs;
3904
3905                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3906                         rx_conf->dcb_tc[i] = i % num_tcs;
3907                         tx_conf->dcb_tc[i] = i % num_tcs;
3908                 }
3909
3910                 eth_conf->rxmode.mq_mode =
3911                                 (enum rte_eth_rx_mq_mode)
3912                                         (rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3913                 eth_conf->rx_adv_conf.rss_conf = rss_conf;
3914                 eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3915         }
3916
3917         if (pfc_en)
3918                 eth_conf->dcb_capability_en =
3919                                 ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3920         else
3921                 eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3922
3923         return 0;
3924 }
3925
3926 int
3927 init_port_dcb_config(portid_t pid,
3928                      enum dcb_mode_enable dcb_mode,
3929                      enum rte_eth_nb_tcs num_tcs,
3930                      uint8_t pfc_en)
3931 {
3932         struct rte_eth_conf port_conf;
3933         struct rte_port *rte_port;
3934         int retval;
3935         uint16_t i;
3936
3937         if (num_procs > 1) {
3938                 printf("The multi-process feature doesn't support dcb.\n");
3939                 return -ENOTSUP;
3940         }
3941         rte_port = &ports[pid];
3942
3943         /* retain the original device configuration. */
3944         memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
3945
3946         /*set configuration of DCB in vt mode and DCB in non-vt mode*/
3947         retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3948         if (retval < 0)
3949                 return retval;
3950         port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3951
3952         /* re-configure the device . */
3953         retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3954         if (retval < 0)
3955                 return retval;
3956
3957         retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3958         if (retval != 0)
3959                 return retval;
3960
3961         /* If dev_info.vmdq_pool_base is greater than 0,
3962          * the queue id of vmdq pools is started after pf queues.
3963          */
3964         if (dcb_mode == DCB_VT_ENABLED &&
3965             rte_port->dev_info.vmdq_pool_base > 0) {
3966                 fprintf(stderr,
3967                         "VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3968                         pid);
3969                 return -1;
3970         }
3971
3972         /* Assume the ports in testpmd have the same dcb capability
3973          * and has the same number of rxq and txq in dcb mode
3974          */
3975         if (dcb_mode == DCB_VT_ENABLED) {
3976                 if (rte_port->dev_info.max_vfs > 0) {
3977                         nb_rxq = rte_port->dev_info.nb_rx_queues;
3978                         nb_txq = rte_port->dev_info.nb_tx_queues;
3979                 } else {
3980                         nb_rxq = rte_port->dev_info.max_rx_queues;
3981                         nb_txq = rte_port->dev_info.max_tx_queues;
3982                 }
3983         } else {
3984                 /*if vt is disabled, use all pf queues */
3985                 if (rte_port->dev_info.vmdq_pool_base == 0) {
3986                         nb_rxq = rte_port->dev_info.max_rx_queues;
3987                         nb_txq = rte_port->dev_info.max_tx_queues;
3988                 } else {
3989                         nb_rxq = (queueid_t)num_tcs;
3990                         nb_txq = (queueid_t)num_tcs;
3991
3992                 }
3993         }
3994         rx_free_thresh = 64;
3995
3996         memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3997
3998         rxtx_port_config(pid);
3999         /* VLAN filter */
4000         rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
4001         for (i = 0; i < RTE_DIM(vlan_tags); i++)
4002                 rx_vft_set(pid, vlan_tags[i], 1);
4003
4004         retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
4005         if (retval != 0)
4006                 return retval;
4007
4008         rte_port->dcb_flag = 1;
4009
4010         /* Enter DCB configuration status */
4011         dcb_config = 1;
4012
4013         return 0;
4014 }
4015
4016 static void
4017 init_port(void)
4018 {
4019         int i;
4020
4021         /* Configuration of Ethernet ports. */
4022         ports = rte_zmalloc("testpmd: ports",
4023                             sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4024                             RTE_CACHE_LINE_SIZE);
4025         if (ports == NULL) {
4026                 rte_exit(EXIT_FAILURE,
4027                                 "rte_zmalloc(%d struct rte_port) failed\n",
4028                                 RTE_MAX_ETHPORTS);
4029         }
4030         for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4031                 ports[i].xstats_info.allocated = false;
4032         for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4033                 LIST_INIT(&ports[i].flow_tunnel_list);
4034         /* Initialize ports NUMA structures */
4035         memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4036         memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4037         memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4038 }
4039
4040 static void
4041 force_quit(void)
4042 {
4043         pmd_test_exit();
4044         prompt_exit();
4045 }
4046
4047 static void
4048 print_stats(void)
4049 {
4050         uint8_t i;
4051         const char clr[] = { 27, '[', '2', 'J', '\0' };
4052         const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4053
4054         /* Clear screen and move to top left */
4055         printf("%s%s", clr, top_left);
4056
4057         printf("\nPort statistics ====================================");
4058         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4059                 nic_stats_display(fwd_ports_ids[i]);
4060
4061         fflush(stdout);
4062 }
4063
4064 static void
4065 signal_handler(int signum)
4066 {
4067         if (signum == SIGINT || signum == SIGTERM) {
4068                 fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4069                         signum);
4070 #ifdef RTE_LIB_PDUMP
4071                 /* uninitialize packet capture framework */
4072                 rte_pdump_uninit();
4073 #endif
4074 #ifdef RTE_LIB_LATENCYSTATS
4075                 if (latencystats_enabled != 0)
4076                         rte_latencystats_uninit();
4077 #endif
4078                 force_quit();
4079                 /* Set flag to indicate the force termination. */
4080                 f_quit = 1;
4081                 /* exit with the expected status */
4082 #ifndef RTE_EXEC_ENV_WINDOWS
4083                 signal(signum, SIG_DFL);
4084                 kill(getpid(), signum);
4085 #endif
4086         }
4087 }
4088
4089 int
4090 main(int argc, char** argv)
4091 {
4092         int diag;
4093         portid_t port_id;
4094         uint16_t count;
4095         int ret;
4096
4097         signal(SIGINT, signal_handler);
4098         signal(SIGTERM, signal_handler);
4099
4100         testpmd_logtype = rte_log_register("testpmd");
4101         if (testpmd_logtype < 0)
4102                 rte_exit(EXIT_FAILURE, "Cannot register log type");
4103         rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4104
4105         diag = rte_eal_init(argc, argv);
4106         if (diag < 0)
4107                 rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4108                          rte_strerror(rte_errno));
4109
4110         ret = register_eth_event_callback();
4111         if (ret != 0)
4112                 rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4113
4114 #ifdef RTE_LIB_PDUMP
4115         /* initialize packet capture framework */
4116         rte_pdump_init();
4117 #endif
4118
4119         count = 0;
4120         RTE_ETH_FOREACH_DEV(port_id) {
4121                 ports_ids[count] = port_id;
4122                 count++;
4123         }
4124         nb_ports = (portid_t) count;
4125         if (nb_ports == 0)
4126                 TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4127
4128         /* allocate port structures, and init them */
4129         init_port();
4130
4131         set_def_fwd_config();
4132         if (nb_lcores == 0)
4133                 rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4134                          "Check the core mask argument\n");
4135
4136         /* Bitrate/latency stats disabled by default */
4137 #ifdef RTE_LIB_BITRATESTATS
4138         bitrate_enabled = 0;
4139 #endif
4140 #ifdef RTE_LIB_LATENCYSTATS
4141         latencystats_enabled = 0;
4142 #endif
4143
4144         /* on FreeBSD, mlockall() is disabled by default */
4145 #ifdef RTE_EXEC_ENV_FREEBSD
4146         do_mlockall = 0;
4147 #else
4148         do_mlockall = 1;
4149 #endif
4150
4151         argc -= diag;
4152         argv += diag;
4153         if (argc > 1)
4154                 launch_args_parse(argc, argv);
4155
4156 #ifndef RTE_EXEC_ENV_WINDOWS
4157         if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4158                 TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4159                         strerror(errno));
4160         }
4161 #endif
4162
4163         if (tx_first && interactive)
4164                 rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4165                                 "interactive mode.\n");
4166
4167         if (tx_first && lsc_interrupt) {
4168                 fprintf(stderr,
4169                         "Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4170                 lsc_interrupt = 0;
4171         }
4172
4173         if (!nb_rxq && !nb_txq)
4174                 fprintf(stderr,
4175                         "Warning: Either rx or tx queues should be non-zero\n");
4176
4177         if (nb_rxq > 1 && nb_rxq > nb_txq)
4178                 fprintf(stderr,
4179                         "Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4180                         nb_rxq, nb_txq);
4181
4182         init_config();
4183
4184         if (hot_plug) {
4185                 ret = rte_dev_hotplug_handle_enable();
4186                 if (ret) {
4187                         RTE_LOG(ERR, EAL,
4188                                 "fail to enable hotplug handling.");
4189                         return -1;
4190                 }
4191
4192                 ret = rte_dev_event_monitor_start();
4193                 if (ret) {
4194                         RTE_LOG(ERR, EAL,
4195                                 "fail to start device event monitoring.");
4196                         return -1;
4197                 }
4198
4199                 ret = rte_dev_event_callback_register(NULL,
4200                         dev_event_callback, NULL);
4201                 if (ret) {
4202                         RTE_LOG(ERR, EAL,
4203                                 "fail  to register device event callback\n");
4204                         return -1;
4205                 }
4206         }
4207
4208         if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4209                 rte_exit(EXIT_FAILURE, "Start ports failed\n");
4210
4211         /* set all ports to promiscuous mode by default */
4212         RTE_ETH_FOREACH_DEV(port_id) {
4213                 ret = rte_eth_promiscuous_enable(port_id);
4214                 if (ret != 0)
4215                         fprintf(stderr,
4216                                 "Error during enabling promiscuous mode for port %u: %s - ignore\n",
4217                                 port_id, rte_strerror(-ret));
4218         }
4219
4220         /* Init metrics library */
4221         rte_metrics_init(rte_socket_id());
4222
4223 #ifdef RTE_LIB_LATENCYSTATS
4224         if (latencystats_enabled != 0) {
4225                 int ret = rte_latencystats_init(1, NULL);
4226                 if (ret)
4227                         fprintf(stderr,
4228                                 "Warning: latencystats init() returned error %d\n",
4229                                 ret);
4230                 fprintf(stderr, "Latencystats running on lcore %d\n",
4231                         latencystats_lcore_id);
4232         }
4233 #endif
4234
4235         /* Setup bitrate stats */
4236 #ifdef RTE_LIB_BITRATESTATS
4237         if (bitrate_enabled != 0) {
4238                 bitrate_data = rte_stats_bitrate_create();
4239                 if (bitrate_data == NULL)
4240                         rte_exit(EXIT_FAILURE,
4241                                 "Could not allocate bitrate data.\n");
4242                 rte_stats_bitrate_reg(bitrate_data);
4243         }
4244 #endif
4245 #ifdef RTE_LIB_CMDLINE
4246         if (strlen(cmdline_filename) != 0)
4247                 cmdline_read_from_file(cmdline_filename);
4248
4249         if (interactive == 1) {
4250                 if (auto_start) {
4251                         printf("Start automatic packet forwarding\n");
4252                         start_packet_forwarding(0);
4253                 }
4254                 prompt();
4255                 pmd_test_exit();
4256         } else
4257 #endif
4258         {
4259                 char c;
4260                 int rc;
4261
4262                 f_quit = 0;
4263
4264                 printf("No commandline core given, start packet forwarding\n");
4265                 start_packet_forwarding(tx_first);
4266                 if (stats_period != 0) {
4267                         uint64_t prev_time = 0, cur_time, diff_time = 0;
4268                         uint64_t timer_period;
4269
4270                         /* Convert to number of cycles */
4271                         timer_period = stats_period * rte_get_timer_hz();
4272
4273                         while (f_quit == 0) {
4274                                 cur_time = rte_get_timer_cycles();
4275                                 diff_time += cur_time - prev_time;
4276
4277                                 if (diff_time >= timer_period) {
4278                                         print_stats();
4279                                         /* Reset the timer */
4280                                         diff_time = 0;
4281                                 }
4282                                 /* Sleep to avoid unnecessary checks */
4283                                 prev_time = cur_time;
4284                                 rte_delay_us_sleep(US_PER_S);
4285                         }
4286                 }
4287
4288                 printf("Press enter to exit\n");
4289                 rc = read(0, &c, 1);
4290                 pmd_test_exit();
4291                 if (rc < 0)
4292                         return 1;
4293         }
4294
4295         ret = rte_eal_cleanup();
4296         if (ret != 0)
4297                 rte_exit(EXIT_FAILURE,
4298                          "EAL cleanup failed: %s\n", strerror(-ret));
4299
4300         return EXIT_SUCCESS;
4301 }