d76d298a4b94fa2e4f28d91dc5e7d9fe0fefad89
[dpdk.git] / app / test-pmd / testpmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_atomic.h>
40 #include <rte_branch_prediction.h>
41 #include <rte_mempool.h>
42 #include <rte_malloc.h>
43 #include <rte_mbuf.h>
44 #include <rte_mbuf_pool_ops.h>
45 #include <rte_interrupts.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_dev.h>
50 #include <rte_string_fns.h>
51 #ifdef RTE_NET_IXGBE
52 #include <rte_pmd_ixgbe.h>
53 #endif
54 #ifdef RTE_LIB_PDUMP
55 #include <rte_pdump.h>
56 #endif
57 #include <rte_flow.h>
58 #include <rte_metrics.h>
59 #ifdef RTE_LIB_BITRATESTATS
60 #include <rte_bitrate.h>
61 #endif
62 #ifdef RTE_LIB_LATENCYSTATS
63 #include <rte_latencystats.h>
64 #endif
65 #ifdef RTE_EXEC_ENV_WINDOWS
66 #include <process.h>
67 #endif
68
69 #include "testpmd.h"
70
71 #ifndef MAP_HUGETLB
72 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
73 #define HUGE_FLAG (0x40000)
74 #else
75 #define HUGE_FLAG MAP_HUGETLB
76 #endif
77
78 #ifndef MAP_HUGE_SHIFT
79 /* older kernels (or FreeBSD) will not have this define */
80 #define HUGE_SHIFT (26)
81 #else
82 #define HUGE_SHIFT MAP_HUGE_SHIFT
83 #endif
84
85 #define EXTMEM_HEAP_NAME "extmem"
86 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
87
88 uint16_t verbose_level = 0; /**< Silent by default. */
89 int testpmd_logtype; /**< Log type for testpmd logs */
90
91 /* use main core for command line ? */
92 uint8_t interactive = 0;
93 uint8_t auto_start = 0;
94 uint8_t tx_first;
95 char cmdline_filename[PATH_MAX] = {0};
96
97 /*
98  * NUMA support configuration.
99  * When set, the NUMA support attempts to dispatch the allocation of the
100  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
101  * probed ports among the CPU sockets 0 and 1.
102  * Otherwise, all memory is allocated from CPU socket 0.
103  */
104 uint8_t numa_support = 1; /**< numa enabled by default */
105
106 /*
107  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
108  * not configured.
109  */
110 uint8_t socket_num = UMA_NO_CONFIG;
111
112 /*
113  * Select mempool allocation type:
114  * - native: use regular DPDK memory
115  * - anon: use regular DPDK memory to create mempool, but populate using
116  *         anonymous memory (may not be IOVA-contiguous)
117  * - xmem: use externally allocated hugepage memory
118  */
119 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
120
121 /*
122  * Store specified sockets on which memory pool to be used by ports
123  * is allocated.
124  */
125 uint8_t port_numa[RTE_MAX_ETHPORTS];
126
127 /*
128  * Store specified sockets on which RX ring to be used by ports
129  * is allocated.
130  */
131 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
132
133 /*
134  * Store specified sockets on which TX ring to be used by ports
135  * is allocated.
136  */
137 uint8_t txring_numa[RTE_MAX_ETHPORTS];
138
139 /*
140  * Record the Ethernet address of peer target ports to which packets are
141  * forwarded.
142  * Must be instantiated with the ethernet addresses of peer traffic generator
143  * ports.
144  */
145 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
146 portid_t nb_peer_eth_addrs = 0;
147
148 /*
149  * Probed Target Environment.
150  */
151 struct rte_port *ports;        /**< For all probed ethernet ports. */
152 portid_t nb_ports;             /**< Number of probed ethernet ports. */
153 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
154 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
155
156 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
157
158 /*
159  * Test Forwarding Configuration.
160  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
161  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
162  */
163 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
164 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
165 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
166 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
167
168 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
169 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
170
171 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
172 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
173
174 /*
175  * Forwarding engines.
176  */
177 struct fwd_engine * fwd_engines[] = {
178         &io_fwd_engine,
179         &mac_fwd_engine,
180         &mac_swap_engine,
181         &flow_gen_engine,
182         &rx_only_engine,
183         &tx_only_engine,
184         &csum_fwd_engine,
185         &icmp_echo_engine,
186         &noisy_vnf_engine,
187         &five_tuple_swap_fwd_engine,
188 #ifdef RTE_LIBRTE_IEEE1588
189         &ieee1588_fwd_engine,
190 #endif
191         NULL,
192 };
193
194 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
195 uint16_t mempool_flags;
196
197 struct fwd_config cur_fwd_config;
198 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
199 uint32_t retry_enabled;
200 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
201 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
202
203 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
204 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
205         DEFAULT_MBUF_DATA_SIZE
206 }; /**< Mbuf data space size. */
207 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
208                                       * specified on command-line. */
209 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
210
211 /** Extended statistics to show. */
212 struct rte_eth_xstat_name *xstats_display;
213
214 unsigned int xstats_display_num; /**< Size of extended statistics to show */
215
216 /*
217  * In container, it cannot terminate the process which running with 'stats-period'
218  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
219  */
220 uint8_t f_quit;
221
222 /*
223  * Max Rx frame size, set by '--max-pkt-len' parameter.
224  */
225 uint32_t max_rx_pkt_len;
226
227 /*
228  * Configuration of packet segments used to scatter received packets
229  * if some of split features is configured.
230  */
231 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
232 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
233 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
234 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
235
236 /*
237  * Configuration of packet segments used by the "txonly" processing engine.
238  */
239 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
240 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
241         TXONLY_DEF_PACKET_LEN,
242 };
243 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
244
245 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
246 /**< Split policy for packets to TX. */
247
248 uint8_t txonly_multi_flow;
249 /**< Whether multiple flows are generated in TXONLY mode. */
250
251 uint32_t tx_pkt_times_inter;
252 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
253
254 uint32_t tx_pkt_times_intra;
255 /**< Timings for send scheduling in TXONLY mode, time between packets. */
256
257 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
258 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
259 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
260 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
261
262 /* current configuration is in DCB or not,0 means it is not in DCB mode */
263 uint8_t dcb_config = 0;
264
265 /*
266  * Configurable number of RX/TX queues.
267  */
268 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
269 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
270 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
271
272 /*
273  * Configurable number of RX/TX ring descriptors.
274  * Defaults are supplied by drivers via ethdev.
275  */
276 #define RTE_TEST_RX_DESC_DEFAULT 0
277 #define RTE_TEST_TX_DESC_DEFAULT 0
278 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
279 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
280
281 #define RTE_PMD_PARAM_UNSET -1
282 /*
283  * Configurable values of RX and TX ring threshold registers.
284  */
285
286 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
287 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
288 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
289
290 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
291 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
292 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
293
294 /*
295  * Configurable value of RX free threshold.
296  */
297 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
298
299 /*
300  * Configurable value of RX drop enable.
301  */
302 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
303
304 /*
305  * Configurable value of TX free threshold.
306  */
307 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
308
309 /*
310  * Configurable value of TX RS bit threshold.
311  */
312 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
313
314 /*
315  * Configurable value of buffered packets before sending.
316  */
317 uint16_t noisy_tx_sw_bufsz;
318
319 /*
320  * Configurable value of packet buffer timeout.
321  */
322 uint16_t noisy_tx_sw_buf_flush_time;
323
324 /*
325  * Configurable value for size of VNF internal memory area
326  * used for simulating noisy neighbour behaviour
327  */
328 uint64_t noisy_lkup_mem_sz;
329
330 /*
331  * Configurable value of number of random writes done in
332  * VNF simulation memory area.
333  */
334 uint64_t noisy_lkup_num_writes;
335
336 /*
337  * Configurable value of number of random reads done in
338  * VNF simulation memory area.
339  */
340 uint64_t noisy_lkup_num_reads;
341
342 /*
343  * Configurable value of number of random reads/writes done in
344  * VNF simulation memory area.
345  */
346 uint64_t noisy_lkup_num_reads_writes;
347
348 /*
349  * Receive Side Scaling (RSS) configuration.
350  */
351 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
352
353 /*
354  * Port topology configuration
355  */
356 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
357
358 /*
359  * Avoids to flush all the RX streams before starts forwarding.
360  */
361 uint8_t no_flush_rx = 0; /* flush by default */
362
363 /*
364  * Flow API isolated mode.
365  */
366 uint8_t flow_isolate_all;
367
368 /*
369  * Avoids to check link status when starting/stopping a port.
370  */
371 uint8_t no_link_check = 0; /* check by default */
372
373 /*
374  * Don't automatically start all ports in interactive mode.
375  */
376 uint8_t no_device_start = 0;
377
378 /*
379  * Enable link status change notification
380  */
381 uint8_t lsc_interrupt = 1; /* enabled by default */
382
383 /*
384  * Enable device removal notification.
385  */
386 uint8_t rmv_interrupt = 1; /* enabled by default */
387
388 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
389
390 /* After attach, port setup is called on event or by iterator */
391 bool setup_on_probe_event = true;
392
393 /* Clear ptypes on port initialization. */
394 uint8_t clear_ptypes = true;
395
396 /* Hairpin ports configuration mode. */
397 uint16_t hairpin_mode;
398
399 /* Pretty printing of ethdev events */
400 static const char * const eth_event_desc[] = {
401         [RTE_ETH_EVENT_UNKNOWN] = "unknown",
402         [RTE_ETH_EVENT_INTR_LSC] = "link state change",
403         [RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
404         [RTE_ETH_EVENT_INTR_RESET] = "reset",
405         [RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
406         [RTE_ETH_EVENT_IPSEC] = "IPsec",
407         [RTE_ETH_EVENT_MACSEC] = "MACsec",
408         [RTE_ETH_EVENT_INTR_RMV] = "device removal",
409         [RTE_ETH_EVENT_NEW] = "device probed",
410         [RTE_ETH_EVENT_DESTROY] = "device released",
411         [RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
412         [RTE_ETH_EVENT_MAX] = NULL,
413 };
414
415 /*
416  * Display or mask ether events
417  * Default to all events except VF_MBOX
418  */
419 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
420                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
421                             (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
422                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
423                             (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
424                             (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
425                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
426                             (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
427 /*
428  * Decide if all memory are locked for performance.
429  */
430 int do_mlockall = 0;
431
432 /*
433  * NIC bypass mode configuration options.
434  */
435
436 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
437 /* The NIC bypass watchdog timeout. */
438 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
439 #endif
440
441
442 #ifdef RTE_LIB_LATENCYSTATS
443
444 /*
445  * Set when latency stats is enabled in the commandline
446  */
447 uint8_t latencystats_enabled;
448
449 /*
450  * Lcore ID to serive latency statistics.
451  */
452 lcoreid_t latencystats_lcore_id = -1;
453
454 #endif
455
456 /*
457  * Ethernet device configuration.
458  */
459 struct rte_eth_rxmode rx_mode;
460
461 struct rte_eth_txmode tx_mode = {
462         .offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
463 };
464
465 struct rte_fdir_conf fdir_conf = {
466         .mode = RTE_FDIR_MODE_NONE,
467         .pballoc = RTE_FDIR_PBALLOC_64K,
468         .status = RTE_FDIR_REPORT_STATUS,
469         .mask = {
470                 .vlan_tci_mask = 0xFFEF,
471                 .ipv4_mask     = {
472                         .src_ip = 0xFFFFFFFF,
473                         .dst_ip = 0xFFFFFFFF,
474                 },
475                 .ipv6_mask     = {
476                         .src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
477                         .dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
478                 },
479                 .src_port_mask = 0xFFFF,
480                 .dst_port_mask = 0xFFFF,
481                 .mac_addr_byte_mask = 0xFF,
482                 .tunnel_type_mask = 1,
483                 .tunnel_id_mask = 0xFFFFFFFF,
484         },
485         .drop_queue = 127,
486 };
487
488 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
489
490 /*
491  * Display zero values by default for xstats
492  */
493 uint8_t xstats_hide_zero;
494
495 /*
496  * Measure of CPU cycles disabled by default
497  */
498 uint8_t record_core_cycles;
499
500 /*
501  * Display of RX and TX bursts disabled by default
502  */
503 uint8_t record_burst_stats;
504
505 /*
506  * Number of ports per shared Rx queue group, 0 disable.
507  */
508 uint32_t rxq_share;
509
510 unsigned int num_sockets = 0;
511 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
512
513 #ifdef RTE_LIB_BITRATESTATS
514 /* Bitrate statistics */
515 struct rte_stats_bitrates *bitrate_data;
516 lcoreid_t bitrate_lcore_id;
517 uint8_t bitrate_enabled;
518 #endif
519
520 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
521 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
522
523 /*
524  * hexadecimal bitmask of RX mq mode can be enabled.
525  */
526 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
527
528 /*
529  * Used to set forced link speed
530  */
531 uint32_t eth_link_speed;
532
533 /*
534  * ID of the current process in multi-process, used to
535  * configure the queues to be polled.
536  */
537 int proc_id;
538
539 /*
540  * Number of processes in multi-process, used to
541  * configure the queues to be polled.
542  */
543 unsigned int num_procs = 1;
544
545 static void
546 eth_rx_metadata_negotiate_mp(uint16_t port_id)
547 {
548         uint64_t rx_meta_features = 0;
549         int ret;
550
551         if (!is_proc_primary())
552                 return;
553
554         rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
555         rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
556         rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
557
558         ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
559         if (ret == 0) {
560                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
561                         TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
562                                     port_id);
563                 }
564
565                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
566                         TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
567                                     port_id);
568                 }
569
570                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
571                         TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
572                                     port_id);
573                 }
574         } else if (ret != -ENOTSUP) {
575                 rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
576                          port_id, rte_strerror(-ret));
577         }
578 }
579
580 static void
581 flow_pick_transfer_proxy_mp(uint16_t port_id)
582 {
583         struct rte_port *port = &ports[port_id];
584         int ret;
585
586         port->flow_transfer_proxy = port_id;
587
588         if (!is_proc_primary())
589                 return;
590
591         ret = rte_flow_pick_transfer_proxy(port_id, &port->flow_transfer_proxy,
592                                            NULL);
593         if (ret != 0) {
594                 fprintf(stderr, "Error picking flow transfer proxy for port %u: %s - ignore\n",
595                         port_id, rte_strerror(-ret));
596         }
597 }
598
599 static int
600 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
601                       const struct rte_eth_conf *dev_conf)
602 {
603         if (is_proc_primary())
604                 return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
605                                         dev_conf);
606         return 0;
607 }
608
609 static int
610 eth_dev_start_mp(uint16_t port_id)
611 {
612         if (is_proc_primary())
613                 return rte_eth_dev_start(port_id);
614
615         return 0;
616 }
617
618 static int
619 eth_dev_stop_mp(uint16_t port_id)
620 {
621         if (is_proc_primary())
622                 return rte_eth_dev_stop(port_id);
623
624         return 0;
625 }
626
627 static void
628 mempool_free_mp(struct rte_mempool *mp)
629 {
630         if (is_proc_primary())
631                 rte_mempool_free(mp);
632 }
633
634 static int
635 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
636 {
637         if (is_proc_primary())
638                 return rte_eth_dev_set_mtu(port_id, mtu);
639
640         return 0;
641 }
642
643 /* Forward function declarations */
644 static void setup_attached_port(portid_t pi);
645 static void check_all_ports_link_status(uint32_t port_mask);
646 static int eth_event_callback(portid_t port_id,
647                               enum rte_eth_event_type type,
648                               void *param, void *ret_param);
649 static void dev_event_callback(const char *device_name,
650                                 enum rte_dev_event_type type,
651                                 void *param);
652 static void fill_xstats_display_info(void);
653
654 /*
655  * Check if all the ports are started.
656  * If yes, return positive value. If not, return zero.
657  */
658 static int all_ports_started(void);
659
660 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
661 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
662
663 /* Holds the registered mbuf dynamic flags names. */
664 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
665
666
667 /*
668  * Helper function to check if socket is already discovered.
669  * If yes, return positive value. If not, return zero.
670  */
671 int
672 new_socket_id(unsigned int socket_id)
673 {
674         unsigned int i;
675
676         for (i = 0; i < num_sockets; i++) {
677                 if (socket_ids[i] == socket_id)
678                         return 0;
679         }
680         return 1;
681 }
682
683 /*
684  * Setup default configuration.
685  */
686 static void
687 set_default_fwd_lcores_config(void)
688 {
689         unsigned int i;
690         unsigned int nb_lc;
691         unsigned int sock_num;
692
693         nb_lc = 0;
694         for (i = 0; i < RTE_MAX_LCORE; i++) {
695                 if (!rte_lcore_is_enabled(i))
696                         continue;
697                 sock_num = rte_lcore_to_socket_id(i);
698                 if (new_socket_id(sock_num)) {
699                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
700                                 rte_exit(EXIT_FAILURE,
701                                          "Total sockets greater than %u\n",
702                                          RTE_MAX_NUMA_NODES);
703                         }
704                         socket_ids[num_sockets++] = sock_num;
705                 }
706                 if (i == rte_get_main_lcore())
707                         continue;
708                 fwd_lcores_cpuids[nb_lc++] = i;
709         }
710         nb_lcores = (lcoreid_t) nb_lc;
711         nb_cfg_lcores = nb_lcores;
712         nb_fwd_lcores = 1;
713 }
714
715 static void
716 set_def_peer_eth_addrs(void)
717 {
718         portid_t i;
719
720         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
721                 peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
722                 peer_eth_addrs[i].addr_bytes[5] = i;
723         }
724 }
725
726 static void
727 set_default_fwd_ports_config(void)
728 {
729         portid_t pt_id;
730         int i = 0;
731
732         RTE_ETH_FOREACH_DEV(pt_id) {
733                 fwd_ports_ids[i++] = pt_id;
734
735                 /* Update sockets info according to the attached device */
736                 int socket_id = rte_eth_dev_socket_id(pt_id);
737                 if (socket_id >= 0 && new_socket_id(socket_id)) {
738                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
739                                 rte_exit(EXIT_FAILURE,
740                                          "Total sockets greater than %u\n",
741                                          RTE_MAX_NUMA_NODES);
742                         }
743                         socket_ids[num_sockets++] = socket_id;
744                 }
745         }
746
747         nb_cfg_ports = nb_ports;
748         nb_fwd_ports = nb_ports;
749 }
750
751 void
752 set_def_fwd_config(void)
753 {
754         set_default_fwd_lcores_config();
755         set_def_peer_eth_addrs();
756         set_default_fwd_ports_config();
757 }
758
759 #ifndef RTE_EXEC_ENV_WINDOWS
760 /* extremely pessimistic estimation of memory required to create a mempool */
761 static int
762 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
763 {
764         unsigned int n_pages, mbuf_per_pg, leftover;
765         uint64_t total_mem, mbuf_mem, obj_sz;
766
767         /* there is no good way to predict how much space the mempool will
768          * occupy because it will allocate chunks on the fly, and some of those
769          * will come from default DPDK memory while some will come from our
770          * external memory, so just assume 128MB will be enough for everyone.
771          */
772         uint64_t hdr_mem = 128 << 20;
773
774         /* account for possible non-contiguousness */
775         obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
776         if (obj_sz > pgsz) {
777                 TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
778                 return -1;
779         }
780
781         mbuf_per_pg = pgsz / obj_sz;
782         leftover = (nb_mbufs % mbuf_per_pg) > 0;
783         n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
784
785         mbuf_mem = n_pages * pgsz;
786
787         total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
788
789         if (total_mem > SIZE_MAX) {
790                 TESTPMD_LOG(ERR, "Memory size too big\n");
791                 return -1;
792         }
793         *out = (size_t)total_mem;
794
795         return 0;
796 }
797
798 static int
799 pagesz_flags(uint64_t page_sz)
800 {
801         /* as per mmap() manpage, all page sizes are log2 of page size
802          * shifted by MAP_HUGE_SHIFT
803          */
804         int log2 = rte_log2_u64(page_sz);
805
806         return (log2 << HUGE_SHIFT);
807 }
808
809 static void *
810 alloc_mem(size_t memsz, size_t pgsz, bool huge)
811 {
812         void *addr;
813         int flags;
814
815         /* allocate anonymous hugepages */
816         flags = MAP_ANONYMOUS | MAP_PRIVATE;
817         if (huge)
818                 flags |= HUGE_FLAG | pagesz_flags(pgsz);
819
820         addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
821         if (addr == MAP_FAILED)
822                 return NULL;
823
824         return addr;
825 }
826
827 struct extmem_param {
828         void *addr;
829         size_t len;
830         size_t pgsz;
831         rte_iova_t *iova_table;
832         unsigned int iova_table_len;
833 };
834
835 static int
836 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
837                 bool huge)
838 {
839         uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
840                         RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
841         unsigned int cur_page, n_pages, pgsz_idx;
842         size_t mem_sz, cur_pgsz;
843         rte_iova_t *iovas = NULL;
844         void *addr;
845         int ret;
846
847         for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
848                 /* skip anything that is too big */
849                 if (pgsizes[pgsz_idx] > SIZE_MAX)
850                         continue;
851
852                 cur_pgsz = pgsizes[pgsz_idx];
853
854                 /* if we were told not to allocate hugepages, override */
855                 if (!huge)
856                         cur_pgsz = sysconf(_SC_PAGESIZE);
857
858                 ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
859                 if (ret < 0) {
860                         TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
861                         return -1;
862                 }
863
864                 /* allocate our memory */
865                 addr = alloc_mem(mem_sz, cur_pgsz, huge);
866
867                 /* if we couldn't allocate memory with a specified page size,
868                  * that doesn't mean we can't do it with other page sizes, so
869                  * try another one.
870                  */
871                 if (addr == NULL)
872                         continue;
873
874                 /* store IOVA addresses for every page in this memory area */
875                 n_pages = mem_sz / cur_pgsz;
876
877                 iovas = malloc(sizeof(*iovas) * n_pages);
878
879                 if (iovas == NULL) {
880                         TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
881                         goto fail;
882                 }
883                 /* lock memory if it's not huge pages */
884                 if (!huge)
885                         mlock(addr, mem_sz);
886
887                 /* populate IOVA addresses */
888                 for (cur_page = 0; cur_page < n_pages; cur_page++) {
889                         rte_iova_t iova;
890                         size_t offset;
891                         void *cur;
892
893                         offset = cur_pgsz * cur_page;
894                         cur = RTE_PTR_ADD(addr, offset);
895
896                         /* touch the page before getting its IOVA */
897                         *(volatile char *)cur = 0;
898
899                         iova = rte_mem_virt2iova(cur);
900
901                         iovas[cur_page] = iova;
902                 }
903
904                 break;
905         }
906         /* if we couldn't allocate anything */
907         if (iovas == NULL)
908                 return -1;
909
910         param->addr = addr;
911         param->len = mem_sz;
912         param->pgsz = cur_pgsz;
913         param->iova_table = iovas;
914         param->iova_table_len = n_pages;
915
916         return 0;
917 fail:
918         if (iovas)
919                 free(iovas);
920         if (addr)
921                 munmap(addr, mem_sz);
922
923         return -1;
924 }
925
926 static int
927 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
928 {
929         struct extmem_param param;
930         int socket_id, ret;
931
932         memset(&param, 0, sizeof(param));
933
934         /* check if our heap exists */
935         socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
936         if (socket_id < 0) {
937                 /* create our heap */
938                 ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
939                 if (ret < 0) {
940                         TESTPMD_LOG(ERR, "Cannot create heap\n");
941                         return -1;
942                 }
943         }
944
945         ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
946         if (ret < 0) {
947                 TESTPMD_LOG(ERR, "Cannot create memory area\n");
948                 return -1;
949         }
950
951         /* we now have a valid memory area, so add it to heap */
952         ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
953                         param.addr, param.len, param.iova_table,
954                         param.iova_table_len, param.pgsz);
955
956         /* when using VFIO, memory is automatically mapped for DMA by EAL */
957
958         /* not needed any more */
959         free(param.iova_table);
960
961         if (ret < 0) {
962                 TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
963                 munmap(param.addr, param.len);
964                 return -1;
965         }
966
967         /* success */
968
969         TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
970                         param.len >> 20);
971
972         return 0;
973 }
974 static void
975 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
976              struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
977 {
978         uint16_t pid = 0;
979         int ret;
980
981         RTE_ETH_FOREACH_DEV(pid) {
982                 struct rte_eth_dev_info dev_info;
983
984                 ret = eth_dev_info_get_print_err(pid, &dev_info);
985                 if (ret != 0) {
986                         TESTPMD_LOG(DEBUG,
987                                     "unable to get device info for port %d on addr 0x%p,"
988                                     "mempool unmapping will not be performed\n",
989                                     pid, memhdr->addr);
990                         continue;
991                 }
992
993                 ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
994                 if (ret) {
995                         TESTPMD_LOG(DEBUG,
996                                     "unable to DMA unmap addr 0x%p "
997                                     "for device %s\n",
998                                     memhdr->addr, dev_info.device->name);
999                 }
1000         }
1001         ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
1002         if (ret) {
1003                 TESTPMD_LOG(DEBUG,
1004                             "unable to un-register addr 0x%p\n", memhdr->addr);
1005         }
1006 }
1007
1008 static void
1009 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1010            struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1011 {
1012         uint16_t pid = 0;
1013         size_t page_size = sysconf(_SC_PAGESIZE);
1014         int ret;
1015
1016         ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1017                                   page_size);
1018         if (ret) {
1019                 TESTPMD_LOG(DEBUG,
1020                             "unable to register addr 0x%p\n", memhdr->addr);
1021                 return;
1022         }
1023         RTE_ETH_FOREACH_DEV(pid) {
1024                 struct rte_eth_dev_info dev_info;
1025
1026                 ret = eth_dev_info_get_print_err(pid, &dev_info);
1027                 if (ret != 0) {
1028                         TESTPMD_LOG(DEBUG,
1029                                     "unable to get device info for port %d on addr 0x%p,"
1030                                     "mempool mapping will not be performed\n",
1031                                     pid, memhdr->addr);
1032                         continue;
1033                 }
1034                 ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1035                 if (ret) {
1036                         TESTPMD_LOG(DEBUG,
1037                                     "unable to DMA map addr 0x%p "
1038                                     "for device %s\n",
1039                                     memhdr->addr, dev_info.device->name);
1040                 }
1041         }
1042 }
1043 #endif
1044
1045 static unsigned int
1046 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1047             char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1048 {
1049         struct rte_pktmbuf_extmem *xmem;
1050         unsigned int ext_num, zone_num, elt_num;
1051         uint16_t elt_size;
1052
1053         elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1054         elt_num = EXTBUF_ZONE_SIZE / elt_size;
1055         zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1056
1057         xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1058         if (xmem == NULL) {
1059                 TESTPMD_LOG(ERR, "Cannot allocate memory for "
1060                                  "external buffer descriptors\n");
1061                 *ext_mem = NULL;
1062                 return 0;
1063         }
1064         for (ext_num = 0; ext_num < zone_num; ext_num++) {
1065                 struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1066                 const struct rte_memzone *mz;
1067                 char mz_name[RTE_MEMZONE_NAMESIZE];
1068                 int ret;
1069
1070                 ret = snprintf(mz_name, sizeof(mz_name),
1071                         RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1072                 if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1073                         errno = ENAMETOOLONG;
1074                         ext_num = 0;
1075                         break;
1076                 }
1077                 mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
1078                                                  socket_id,
1079                                                  RTE_MEMZONE_IOVA_CONTIG |
1080                                                  RTE_MEMZONE_1GB |
1081                                                  RTE_MEMZONE_SIZE_HINT_ONLY,
1082                                                  EXTBUF_ZONE_SIZE);
1083                 if (mz == NULL) {
1084                         /*
1085                          * The caller exits on external buffer creation
1086                          * error, so there is no need to free memzones.
1087                          */
1088                         errno = ENOMEM;
1089                         ext_num = 0;
1090                         break;
1091                 }
1092                 xseg->buf_ptr = mz->addr;
1093                 xseg->buf_iova = mz->iova;
1094                 xseg->buf_len = EXTBUF_ZONE_SIZE;
1095                 xseg->elt_size = elt_size;
1096         }
1097         if (ext_num == 0 && xmem != NULL) {
1098                 free(xmem);
1099                 xmem = NULL;
1100         }
1101         *ext_mem = xmem;
1102         return ext_num;
1103 }
1104
1105 /*
1106  * Configuration initialisation done once at init time.
1107  */
1108 static struct rte_mempool *
1109 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1110                  unsigned int socket_id, uint16_t size_idx)
1111 {
1112         char pool_name[RTE_MEMPOOL_NAMESIZE];
1113         struct rte_mempool *rte_mp = NULL;
1114 #ifndef RTE_EXEC_ENV_WINDOWS
1115         uint32_t mb_size;
1116
1117         mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1118 #endif
1119         mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1120         if (!is_proc_primary()) {
1121                 rte_mp = rte_mempool_lookup(pool_name);
1122                 if (rte_mp == NULL)
1123                         rte_exit(EXIT_FAILURE,
1124                                 "Get mbuf pool for socket %u failed: %s\n",
1125                                 socket_id, rte_strerror(rte_errno));
1126                 return rte_mp;
1127         }
1128
1129         TESTPMD_LOG(INFO,
1130                 "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1131                 pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1132
1133         switch (mp_alloc_type) {
1134         case MP_ALLOC_NATIVE:
1135                 {
1136                         /* wrapper to rte_mempool_create() */
1137                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1138                                         rte_mbuf_best_mempool_ops());
1139                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1140                                 mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1141                         break;
1142                 }
1143 #ifndef RTE_EXEC_ENV_WINDOWS
1144         case MP_ALLOC_ANON:
1145                 {
1146                         rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1147                                 mb_size, (unsigned int) mb_mempool_cache,
1148                                 sizeof(struct rte_pktmbuf_pool_private),
1149                                 socket_id, mempool_flags);
1150                         if (rte_mp == NULL)
1151                                 goto err;
1152
1153                         if (rte_mempool_populate_anon(rte_mp) == 0) {
1154                                 rte_mempool_free(rte_mp);
1155                                 rte_mp = NULL;
1156                                 goto err;
1157                         }
1158                         rte_pktmbuf_pool_init(rte_mp, NULL);
1159                         rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1160                         rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1161                         break;
1162                 }
1163         case MP_ALLOC_XMEM:
1164         case MP_ALLOC_XMEM_HUGE:
1165                 {
1166                         int heap_socket;
1167                         bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1168
1169                         if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1170                                 rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1171
1172                         heap_socket =
1173                                 rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1174                         if (heap_socket < 0)
1175                                 rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1176
1177                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1178                                         rte_mbuf_best_mempool_ops());
1179                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1180                                         mb_mempool_cache, 0, mbuf_seg_size,
1181                                         heap_socket);
1182                         break;
1183                 }
1184 #endif
1185         case MP_ALLOC_XBUF:
1186                 {
1187                         struct rte_pktmbuf_extmem *ext_mem;
1188                         unsigned int ext_num;
1189
1190                         ext_num = setup_extbuf(nb_mbuf, mbuf_seg_size,
1191                                                socket_id, pool_name, &ext_mem);
1192                         if (ext_num == 0)
1193                                 rte_exit(EXIT_FAILURE,
1194                                          "Can't create pinned data buffers\n");
1195
1196                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1197                                         rte_mbuf_best_mempool_ops());
1198                         rte_mp = rte_pktmbuf_pool_create_extbuf
1199                                         (pool_name, nb_mbuf, mb_mempool_cache,
1200                                          0, mbuf_seg_size, socket_id,
1201                                          ext_mem, ext_num);
1202                         free(ext_mem);
1203                         break;
1204                 }
1205         default:
1206                 {
1207                         rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1208                 }
1209         }
1210
1211 #ifndef RTE_EXEC_ENV_WINDOWS
1212 err:
1213 #endif
1214         if (rte_mp == NULL) {
1215                 rte_exit(EXIT_FAILURE,
1216                         "Creation of mbuf pool for socket %u failed: %s\n",
1217                         socket_id, rte_strerror(rte_errno));
1218         } else if (verbose_level > 0) {
1219                 rte_mempool_dump(stdout, rte_mp);
1220         }
1221         return rte_mp;
1222 }
1223
1224 /*
1225  * Check given socket id is valid or not with NUMA mode,
1226  * if valid, return 0, else return -1
1227  */
1228 static int
1229 check_socket_id(const unsigned int socket_id)
1230 {
1231         static int warning_once = 0;
1232
1233         if (new_socket_id(socket_id)) {
1234                 if (!warning_once && numa_support)
1235                         fprintf(stderr,
1236                                 "Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1237                 warning_once = 1;
1238                 return -1;
1239         }
1240         return 0;
1241 }
1242
1243 /*
1244  * Get the allowed maximum number of RX queues.
1245  * *pid return the port id which has minimal value of
1246  * max_rx_queues in all ports.
1247  */
1248 queueid_t
1249 get_allowed_max_nb_rxq(portid_t *pid)
1250 {
1251         queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1252         bool max_rxq_valid = false;
1253         portid_t pi;
1254         struct rte_eth_dev_info dev_info;
1255
1256         RTE_ETH_FOREACH_DEV(pi) {
1257                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1258                         continue;
1259
1260                 max_rxq_valid = true;
1261                 if (dev_info.max_rx_queues < allowed_max_rxq) {
1262                         allowed_max_rxq = dev_info.max_rx_queues;
1263                         *pid = pi;
1264                 }
1265         }
1266         return max_rxq_valid ? allowed_max_rxq : 0;
1267 }
1268
1269 /*
1270  * Check input rxq is valid or not.
1271  * If input rxq is not greater than any of maximum number
1272  * of RX queues of all ports, it is valid.
1273  * if valid, return 0, else return -1
1274  */
1275 int
1276 check_nb_rxq(queueid_t rxq)
1277 {
1278         queueid_t allowed_max_rxq;
1279         portid_t pid = 0;
1280
1281         allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1282         if (rxq > allowed_max_rxq) {
1283                 fprintf(stderr,
1284                         "Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1285                         rxq, allowed_max_rxq, pid);
1286                 return -1;
1287         }
1288         return 0;
1289 }
1290
1291 /*
1292  * Get the allowed maximum number of TX queues.
1293  * *pid return the port id which has minimal value of
1294  * max_tx_queues in all ports.
1295  */
1296 queueid_t
1297 get_allowed_max_nb_txq(portid_t *pid)
1298 {
1299         queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1300         bool max_txq_valid = false;
1301         portid_t pi;
1302         struct rte_eth_dev_info dev_info;
1303
1304         RTE_ETH_FOREACH_DEV(pi) {
1305                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1306                         continue;
1307
1308                 max_txq_valid = true;
1309                 if (dev_info.max_tx_queues < allowed_max_txq) {
1310                         allowed_max_txq = dev_info.max_tx_queues;
1311                         *pid = pi;
1312                 }
1313         }
1314         return max_txq_valid ? allowed_max_txq : 0;
1315 }
1316
1317 /*
1318  * Check input txq is valid or not.
1319  * If input txq is not greater than any of maximum number
1320  * of TX queues of all ports, it is valid.
1321  * if valid, return 0, else return -1
1322  */
1323 int
1324 check_nb_txq(queueid_t txq)
1325 {
1326         queueid_t allowed_max_txq;
1327         portid_t pid = 0;
1328
1329         allowed_max_txq = get_allowed_max_nb_txq(&pid);
1330         if (txq > allowed_max_txq) {
1331                 fprintf(stderr,
1332                         "Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1333                         txq, allowed_max_txq, pid);
1334                 return -1;
1335         }
1336         return 0;
1337 }
1338
1339 /*
1340  * Get the allowed maximum number of RXDs of every rx queue.
1341  * *pid return the port id which has minimal value of
1342  * max_rxd in all queues of all ports.
1343  */
1344 static uint16_t
1345 get_allowed_max_nb_rxd(portid_t *pid)
1346 {
1347         uint16_t allowed_max_rxd = UINT16_MAX;
1348         portid_t pi;
1349         struct rte_eth_dev_info dev_info;
1350
1351         RTE_ETH_FOREACH_DEV(pi) {
1352                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1353                         continue;
1354
1355                 if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1356                         allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1357                         *pid = pi;
1358                 }
1359         }
1360         return allowed_max_rxd;
1361 }
1362
1363 /*
1364  * Get the allowed minimal number of RXDs of every rx queue.
1365  * *pid return the port id which has minimal value of
1366  * min_rxd in all queues of all ports.
1367  */
1368 static uint16_t
1369 get_allowed_min_nb_rxd(portid_t *pid)
1370 {
1371         uint16_t allowed_min_rxd = 0;
1372         portid_t pi;
1373         struct rte_eth_dev_info dev_info;
1374
1375         RTE_ETH_FOREACH_DEV(pi) {
1376                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1377                         continue;
1378
1379                 if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1380                         allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1381                         *pid = pi;
1382                 }
1383         }
1384
1385         return allowed_min_rxd;
1386 }
1387
1388 /*
1389  * Check input rxd is valid or not.
1390  * If input rxd is not greater than any of maximum number
1391  * of RXDs of every Rx queues and is not less than any of
1392  * minimal number of RXDs of every Rx queues, it is valid.
1393  * if valid, return 0, else return -1
1394  */
1395 int
1396 check_nb_rxd(queueid_t rxd)
1397 {
1398         uint16_t allowed_max_rxd;
1399         uint16_t allowed_min_rxd;
1400         portid_t pid = 0;
1401
1402         allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1403         if (rxd > allowed_max_rxd) {
1404                 fprintf(stderr,
1405                         "Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1406                         rxd, allowed_max_rxd, pid);
1407                 return -1;
1408         }
1409
1410         allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1411         if (rxd < allowed_min_rxd) {
1412                 fprintf(stderr,
1413                         "Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1414                         rxd, allowed_min_rxd, pid);
1415                 return -1;
1416         }
1417
1418         return 0;
1419 }
1420
1421 /*
1422  * Get the allowed maximum number of TXDs of every rx queues.
1423  * *pid return the port id which has minimal value of
1424  * max_txd in every tx queue.
1425  */
1426 static uint16_t
1427 get_allowed_max_nb_txd(portid_t *pid)
1428 {
1429         uint16_t allowed_max_txd = UINT16_MAX;
1430         portid_t pi;
1431         struct rte_eth_dev_info dev_info;
1432
1433         RTE_ETH_FOREACH_DEV(pi) {
1434                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1435                         continue;
1436
1437                 if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1438                         allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1439                         *pid = pi;
1440                 }
1441         }
1442         return allowed_max_txd;
1443 }
1444
1445 /*
1446  * Get the allowed maximum number of TXDs of every tx queues.
1447  * *pid return the port id which has minimal value of
1448  * min_txd in every tx queue.
1449  */
1450 static uint16_t
1451 get_allowed_min_nb_txd(portid_t *pid)
1452 {
1453         uint16_t allowed_min_txd = 0;
1454         portid_t pi;
1455         struct rte_eth_dev_info dev_info;
1456
1457         RTE_ETH_FOREACH_DEV(pi) {
1458                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1459                         continue;
1460
1461                 if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1462                         allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1463                         *pid = pi;
1464                 }
1465         }
1466
1467         return allowed_min_txd;
1468 }
1469
1470 /*
1471  * Check input txd is valid or not.
1472  * If input txd is not greater than any of maximum number
1473  * of TXDs of every Rx queues, it is valid.
1474  * if valid, return 0, else return -1
1475  */
1476 int
1477 check_nb_txd(queueid_t txd)
1478 {
1479         uint16_t allowed_max_txd;
1480         uint16_t allowed_min_txd;
1481         portid_t pid = 0;
1482
1483         allowed_max_txd = get_allowed_max_nb_txd(&pid);
1484         if (txd > allowed_max_txd) {
1485                 fprintf(stderr,
1486                         "Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1487                         txd, allowed_max_txd, pid);
1488                 return -1;
1489         }
1490
1491         allowed_min_txd = get_allowed_min_nb_txd(&pid);
1492         if (txd < allowed_min_txd) {
1493                 fprintf(stderr,
1494                         "Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1495                         txd, allowed_min_txd, pid);
1496                 return -1;
1497         }
1498         return 0;
1499 }
1500
1501
1502 /*
1503  * Get the allowed maximum number of hairpin queues.
1504  * *pid return the port id which has minimal value of
1505  * max_hairpin_queues in all ports.
1506  */
1507 queueid_t
1508 get_allowed_max_nb_hairpinq(portid_t *pid)
1509 {
1510         queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1511         portid_t pi;
1512         struct rte_eth_hairpin_cap cap;
1513
1514         RTE_ETH_FOREACH_DEV(pi) {
1515                 if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1516                         *pid = pi;
1517                         return 0;
1518                 }
1519                 if (cap.max_nb_queues < allowed_max_hairpinq) {
1520                         allowed_max_hairpinq = cap.max_nb_queues;
1521                         *pid = pi;
1522                 }
1523         }
1524         return allowed_max_hairpinq;
1525 }
1526
1527 /*
1528  * Check input hairpin is valid or not.
1529  * If input hairpin is not greater than any of maximum number
1530  * of hairpin queues of all ports, it is valid.
1531  * if valid, return 0, else return -1
1532  */
1533 int
1534 check_nb_hairpinq(queueid_t hairpinq)
1535 {
1536         queueid_t allowed_max_hairpinq;
1537         portid_t pid = 0;
1538
1539         allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1540         if (hairpinq > allowed_max_hairpinq) {
1541                 fprintf(stderr,
1542                         "Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1543                         hairpinq, allowed_max_hairpinq, pid);
1544                 return -1;
1545         }
1546         return 0;
1547 }
1548
1549 static int
1550 get_eth_overhead(struct rte_eth_dev_info *dev_info)
1551 {
1552         uint32_t eth_overhead;
1553
1554         if (dev_info->max_mtu != UINT16_MAX &&
1555             dev_info->max_rx_pktlen > dev_info->max_mtu)
1556                 eth_overhead = dev_info->max_rx_pktlen - dev_info->max_mtu;
1557         else
1558                 eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
1559
1560         return eth_overhead;
1561 }
1562
1563 static void
1564 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1565 {
1566         struct rte_port *port = &ports[pid];
1567         int ret;
1568         int i;
1569
1570         eth_rx_metadata_negotiate_mp(pid);
1571         flow_pick_transfer_proxy_mp(pid);
1572
1573         port->dev_conf.txmode = tx_mode;
1574         port->dev_conf.rxmode = rx_mode;
1575
1576         ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1577         if (ret != 0)
1578                 rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1579
1580         if (!(port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1581                 port->dev_conf.txmode.offloads &=
1582                         ~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1583
1584         /* Apply Rx offloads configuration */
1585         for (i = 0; i < port->dev_info.max_rx_queues; i++)
1586                 port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1587         /* Apply Tx offloads configuration */
1588         for (i = 0; i < port->dev_info.max_tx_queues; i++)
1589                 port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1590
1591         if (eth_link_speed)
1592                 port->dev_conf.link_speeds = eth_link_speed;
1593
1594         if (max_rx_pkt_len)
1595                 port->dev_conf.rxmode.mtu = max_rx_pkt_len -
1596                         get_eth_overhead(&port->dev_info);
1597
1598         /* set flag to initialize port/queue */
1599         port->need_reconfig = 1;
1600         port->need_reconfig_queues = 1;
1601         port->socket_id = socket_id;
1602         port->tx_metadata = 0;
1603
1604         /*
1605          * Check for maximum number of segments per MTU.
1606          * Accordingly update the mbuf data size.
1607          */
1608         if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1609             port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1610                 uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
1611                 uint16_t mtu;
1612
1613                 if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
1614                         uint16_t data_size = (mtu + eth_overhead) /
1615                                 port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1616                         uint16_t buffer_size = data_size + RTE_PKTMBUF_HEADROOM;
1617
1618                         if (buffer_size > mbuf_data_size[0]) {
1619                                 mbuf_data_size[0] = buffer_size;
1620                                 TESTPMD_LOG(WARNING,
1621                                         "Configured mbuf size of the first segment %hu\n",
1622                                         mbuf_data_size[0]);
1623                         }
1624                 }
1625         }
1626 }
1627
1628 static void
1629 init_config(void)
1630 {
1631         portid_t pid;
1632         struct rte_mempool *mbp;
1633         unsigned int nb_mbuf_per_pool;
1634         lcoreid_t  lc_id;
1635         struct rte_gro_param gro_param;
1636         uint32_t gso_types;
1637
1638         /* Configuration of logical cores. */
1639         fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1640                                 sizeof(struct fwd_lcore *) * nb_lcores,
1641                                 RTE_CACHE_LINE_SIZE);
1642         if (fwd_lcores == NULL) {
1643                 rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1644                                                         "failed\n", nb_lcores);
1645         }
1646         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1647                 fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1648                                                sizeof(struct fwd_lcore),
1649                                                RTE_CACHE_LINE_SIZE);
1650                 if (fwd_lcores[lc_id] == NULL) {
1651                         rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1652                                                                 "failed\n");
1653                 }
1654                 fwd_lcores[lc_id]->cpuid_idx = lc_id;
1655         }
1656
1657         RTE_ETH_FOREACH_DEV(pid) {
1658                 uint32_t socket_id;
1659
1660                 if (numa_support) {
1661                         socket_id = port_numa[pid];
1662                         if (port_numa[pid] == NUMA_NO_CONFIG) {
1663                                 socket_id = rte_eth_dev_socket_id(pid);
1664
1665                                 /*
1666                                  * if socket_id is invalid,
1667                                  * set to the first available socket.
1668                                  */
1669                                 if (check_socket_id(socket_id) < 0)
1670                                         socket_id = socket_ids[0];
1671                         }
1672                 } else {
1673                         socket_id = (socket_num == UMA_NO_CONFIG) ?
1674                                     0 : socket_num;
1675                 }
1676                 /* Apply default TxRx configuration for all ports */
1677                 init_config_port_offloads(pid, socket_id);
1678         }
1679         /*
1680          * Create pools of mbuf.
1681          * If NUMA support is disabled, create a single pool of mbuf in
1682          * socket 0 memory by default.
1683          * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1684          *
1685          * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1686          * nb_txd can be configured at run time.
1687          */
1688         if (param_total_num_mbufs)
1689                 nb_mbuf_per_pool = param_total_num_mbufs;
1690         else {
1691                 nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1692                         (nb_lcores * mb_mempool_cache) +
1693                         RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1694                 nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1695         }
1696
1697         if (numa_support) {
1698                 uint8_t i, j;
1699
1700                 for (i = 0; i < num_sockets; i++)
1701                         for (j = 0; j < mbuf_data_size_n; j++)
1702                                 mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1703                                         mbuf_pool_create(mbuf_data_size[j],
1704                                                           nb_mbuf_per_pool,
1705                                                           socket_ids[i], j);
1706         } else {
1707                 uint8_t i;
1708
1709                 for (i = 0; i < mbuf_data_size_n; i++)
1710                         mempools[i] = mbuf_pool_create
1711                                         (mbuf_data_size[i],
1712                                          nb_mbuf_per_pool,
1713                                          socket_num == UMA_NO_CONFIG ?
1714                                          0 : socket_num, i);
1715         }
1716
1717         init_port_config();
1718
1719         gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1720                 DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1721         /*
1722          * Records which Mbuf pool to use by each logical core, if needed.
1723          */
1724         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1725                 mbp = mbuf_pool_find(
1726                         rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1727
1728                 if (mbp == NULL)
1729                         mbp = mbuf_pool_find(0, 0);
1730                 fwd_lcores[lc_id]->mbp = mbp;
1731                 /* initialize GSO context */
1732                 fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1733                 fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1734                 fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1735                 fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1736                         RTE_ETHER_CRC_LEN;
1737                 fwd_lcores[lc_id]->gso_ctx.flag = 0;
1738         }
1739
1740         fwd_config_setup();
1741
1742         /* create a gro context for each lcore */
1743         gro_param.gro_types = RTE_GRO_TCP_IPV4;
1744         gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1745         gro_param.max_item_per_flow = MAX_PKT_BURST;
1746         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1747                 gro_param.socket_id = rte_lcore_to_socket_id(
1748                                 fwd_lcores_cpuids[lc_id]);
1749                 fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1750                 if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1751                         rte_exit(EXIT_FAILURE,
1752                                         "rte_gro_ctx_create() failed\n");
1753                 }
1754         }
1755 }
1756
1757
1758 void
1759 reconfig(portid_t new_port_id, unsigned socket_id)
1760 {
1761         /* Reconfiguration of Ethernet ports. */
1762         init_config_port_offloads(new_port_id, socket_id);
1763         init_port_config();
1764 }
1765
1766
1767 int
1768 init_fwd_streams(void)
1769 {
1770         portid_t pid;
1771         struct rte_port *port;
1772         streamid_t sm_id, nb_fwd_streams_new;
1773         queueid_t q;
1774
1775         /* set socket id according to numa or not */
1776         RTE_ETH_FOREACH_DEV(pid) {
1777                 port = &ports[pid];
1778                 if (nb_rxq > port->dev_info.max_rx_queues) {
1779                         fprintf(stderr,
1780                                 "Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1781                                 nb_rxq, port->dev_info.max_rx_queues);
1782                         return -1;
1783                 }
1784                 if (nb_txq > port->dev_info.max_tx_queues) {
1785                         fprintf(stderr,
1786                                 "Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1787                                 nb_txq, port->dev_info.max_tx_queues);
1788                         return -1;
1789                 }
1790                 if (numa_support) {
1791                         if (port_numa[pid] != NUMA_NO_CONFIG)
1792                                 port->socket_id = port_numa[pid];
1793                         else {
1794                                 port->socket_id = rte_eth_dev_socket_id(pid);
1795
1796                                 /*
1797                                  * if socket_id is invalid,
1798                                  * set to the first available socket.
1799                                  */
1800                                 if (check_socket_id(port->socket_id) < 0)
1801                                         port->socket_id = socket_ids[0];
1802                         }
1803                 }
1804                 else {
1805                         if (socket_num == UMA_NO_CONFIG)
1806                                 port->socket_id = 0;
1807                         else
1808                                 port->socket_id = socket_num;
1809                 }
1810         }
1811
1812         q = RTE_MAX(nb_rxq, nb_txq);
1813         if (q == 0) {
1814                 fprintf(stderr,
1815                         "Fail: Cannot allocate fwd streams as number of queues is 0\n");
1816                 return -1;
1817         }
1818         nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1819         if (nb_fwd_streams_new == nb_fwd_streams)
1820                 return 0;
1821         /* clear the old */
1822         if (fwd_streams != NULL) {
1823                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1824                         if (fwd_streams[sm_id] == NULL)
1825                                 continue;
1826                         rte_free(fwd_streams[sm_id]);
1827                         fwd_streams[sm_id] = NULL;
1828                 }
1829                 rte_free(fwd_streams);
1830                 fwd_streams = NULL;
1831         }
1832
1833         /* init new */
1834         nb_fwd_streams = nb_fwd_streams_new;
1835         if (nb_fwd_streams) {
1836                 fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1837                         sizeof(struct fwd_stream *) * nb_fwd_streams,
1838                         RTE_CACHE_LINE_SIZE);
1839                 if (fwd_streams == NULL)
1840                         rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1841                                  " (struct fwd_stream *)) failed\n",
1842                                  nb_fwd_streams);
1843
1844                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1845                         fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1846                                 " struct fwd_stream", sizeof(struct fwd_stream),
1847                                 RTE_CACHE_LINE_SIZE);
1848                         if (fwd_streams[sm_id] == NULL)
1849                                 rte_exit(EXIT_FAILURE, "rte_zmalloc"
1850                                          "(struct fwd_stream) failed\n");
1851                 }
1852         }
1853
1854         return 0;
1855 }
1856
1857 static void
1858 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1859 {
1860         uint64_t total_burst, sburst;
1861         uint64_t nb_burst;
1862         uint64_t burst_stats[4];
1863         uint16_t pktnb_stats[4];
1864         uint16_t nb_pkt;
1865         int burst_percent[4], sburstp;
1866         int i;
1867
1868         /*
1869          * First compute the total number of packet bursts and the
1870          * two highest numbers of bursts of the same number of packets.
1871          */
1872         memset(&burst_stats, 0x0, sizeof(burst_stats));
1873         memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1874
1875         /* Show stats for 0 burst size always */
1876         total_burst = pbs->pkt_burst_spread[0];
1877         burst_stats[0] = pbs->pkt_burst_spread[0];
1878         pktnb_stats[0] = 0;
1879
1880         /* Find the next 2 burst sizes with highest occurrences. */
1881         for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1882                 nb_burst = pbs->pkt_burst_spread[nb_pkt];
1883
1884                 if (nb_burst == 0)
1885                         continue;
1886
1887                 total_burst += nb_burst;
1888
1889                 if (nb_burst > burst_stats[1]) {
1890                         burst_stats[2] = burst_stats[1];
1891                         pktnb_stats[2] = pktnb_stats[1];
1892                         burst_stats[1] = nb_burst;
1893                         pktnb_stats[1] = nb_pkt;
1894                 } else if (nb_burst > burst_stats[2]) {
1895                         burst_stats[2] = nb_burst;
1896                         pktnb_stats[2] = nb_pkt;
1897                 }
1898         }
1899         if (total_burst == 0)
1900                 return;
1901
1902         printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1903         for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1904                 if (i == 3) {
1905                         printf("%d%% of other]\n", 100 - sburstp);
1906                         return;
1907                 }
1908
1909                 sburst += burst_stats[i];
1910                 if (sburst == total_burst) {
1911                         printf("%d%% of %d pkts]\n",
1912                                 100 - sburstp, (int) pktnb_stats[i]);
1913                         return;
1914                 }
1915
1916                 burst_percent[i] =
1917                         (double)burst_stats[i] / total_burst * 100;
1918                 printf("%d%% of %d pkts + ",
1919                         burst_percent[i], (int) pktnb_stats[i]);
1920                 sburstp += burst_percent[i];
1921         }
1922 }
1923
1924 static void
1925 fwd_stream_stats_display(streamid_t stream_id)
1926 {
1927         struct fwd_stream *fs;
1928         static const char *fwd_top_stats_border = "-------";
1929
1930         fs = fwd_streams[stream_id];
1931         if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1932             (fs->fwd_dropped == 0))
1933                 return;
1934         printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1935                "TX Port=%2d/Queue=%2d %s\n",
1936                fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1937                fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1938         printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1939                " TX-dropped: %-14"PRIu64,
1940                fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1941
1942         /* if checksum mode */
1943         if (cur_fwd_eng == &csum_fwd_engine) {
1944                 printf("  RX- bad IP checksum: %-14"PRIu64
1945                        "  Rx- bad L4 checksum: %-14"PRIu64
1946                        " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1947                         fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1948                         fs->rx_bad_outer_l4_csum);
1949                 printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1950                         fs->rx_bad_outer_ip_csum);
1951         } else {
1952                 printf("\n");
1953         }
1954
1955         if (record_burst_stats) {
1956                 pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1957                 pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1958         }
1959 }
1960
1961 void
1962 fwd_stats_display(void)
1963 {
1964         static const char *fwd_stats_border = "----------------------";
1965         static const char *acc_stats_border = "+++++++++++++++";
1966         struct {
1967                 struct fwd_stream *rx_stream;
1968                 struct fwd_stream *tx_stream;
1969                 uint64_t tx_dropped;
1970                 uint64_t rx_bad_ip_csum;
1971                 uint64_t rx_bad_l4_csum;
1972                 uint64_t rx_bad_outer_l4_csum;
1973                 uint64_t rx_bad_outer_ip_csum;
1974         } ports_stats[RTE_MAX_ETHPORTS];
1975         uint64_t total_rx_dropped = 0;
1976         uint64_t total_tx_dropped = 0;
1977         uint64_t total_rx_nombuf = 0;
1978         struct rte_eth_stats stats;
1979         uint64_t fwd_cycles = 0;
1980         uint64_t total_recv = 0;
1981         uint64_t total_xmit = 0;
1982         struct rte_port *port;
1983         streamid_t sm_id;
1984         portid_t pt_id;
1985         int i;
1986
1987         memset(ports_stats, 0, sizeof(ports_stats));
1988
1989         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1990                 struct fwd_stream *fs = fwd_streams[sm_id];
1991
1992                 if (cur_fwd_config.nb_fwd_streams >
1993                     cur_fwd_config.nb_fwd_ports) {
1994                         fwd_stream_stats_display(sm_id);
1995                 } else {
1996                         ports_stats[fs->tx_port].tx_stream = fs;
1997                         ports_stats[fs->rx_port].rx_stream = fs;
1998                 }
1999
2000                 ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
2001
2002                 ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
2003                 ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
2004                 ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
2005                                 fs->rx_bad_outer_l4_csum;
2006                 ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
2007                                 fs->rx_bad_outer_ip_csum;
2008
2009                 if (record_core_cycles)
2010                         fwd_cycles += fs->core_cycles;
2011         }
2012         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2013                 pt_id = fwd_ports_ids[i];
2014                 port = &ports[pt_id];
2015
2016                 rte_eth_stats_get(pt_id, &stats);
2017                 stats.ipackets -= port->stats.ipackets;
2018                 stats.opackets -= port->stats.opackets;
2019                 stats.ibytes -= port->stats.ibytes;
2020                 stats.obytes -= port->stats.obytes;
2021                 stats.imissed -= port->stats.imissed;
2022                 stats.oerrors -= port->stats.oerrors;
2023                 stats.rx_nombuf -= port->stats.rx_nombuf;
2024
2025                 total_recv += stats.ipackets;
2026                 total_xmit += stats.opackets;
2027                 total_rx_dropped += stats.imissed;
2028                 total_tx_dropped += ports_stats[pt_id].tx_dropped;
2029                 total_tx_dropped += stats.oerrors;
2030                 total_rx_nombuf  += stats.rx_nombuf;
2031
2032                 printf("\n  %s Forward statistics for port %-2d %s\n",
2033                        fwd_stats_border, pt_id, fwd_stats_border);
2034
2035                 printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2036                        "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2037                        stats.ipackets + stats.imissed);
2038
2039                 if (cur_fwd_eng == &csum_fwd_engine) {
2040                         printf("  Bad-ipcsum: %-14"PRIu64
2041                                " Bad-l4csum: %-14"PRIu64
2042                                "Bad-outer-l4csum: %-14"PRIu64"\n",
2043                                ports_stats[pt_id].rx_bad_ip_csum,
2044                                ports_stats[pt_id].rx_bad_l4_csum,
2045                                ports_stats[pt_id].rx_bad_outer_l4_csum);
2046                         printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2047                                ports_stats[pt_id].rx_bad_outer_ip_csum);
2048                 }
2049                 if (stats.ierrors + stats.rx_nombuf > 0) {
2050                         printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2051                         printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2052                 }
2053
2054                 printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2055                        "TX-total: %-"PRIu64"\n",
2056                        stats.opackets, ports_stats[pt_id].tx_dropped,
2057                        stats.opackets + ports_stats[pt_id].tx_dropped);
2058
2059                 if (record_burst_stats) {
2060                         if (ports_stats[pt_id].rx_stream)
2061                                 pkt_burst_stats_display("RX",
2062                                         &ports_stats[pt_id].rx_stream->rx_burst_stats);
2063                         if (ports_stats[pt_id].tx_stream)
2064                                 pkt_burst_stats_display("TX",
2065                                 &ports_stats[pt_id].tx_stream->tx_burst_stats);
2066                 }
2067
2068                 printf("  %s--------------------------------%s\n",
2069                        fwd_stats_border, fwd_stats_border);
2070         }
2071
2072         printf("\n  %s Accumulated forward statistics for all ports"
2073                "%s\n",
2074                acc_stats_border, acc_stats_border);
2075         printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2076                "%-"PRIu64"\n"
2077                "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2078                "%-"PRIu64"\n",
2079                total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2080                total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2081         if (total_rx_nombuf > 0)
2082                 printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2083         printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2084                "%s\n",
2085                acc_stats_border, acc_stats_border);
2086         if (record_core_cycles) {
2087 #define CYC_PER_MHZ 1E6
2088                 if (total_recv > 0 || total_xmit > 0) {
2089                         uint64_t total_pkts = 0;
2090                         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2091                             strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2092                                 total_pkts = total_xmit;
2093                         else
2094                                 total_pkts = total_recv;
2095
2096                         printf("\n  CPU cycles/packet=%.2F (total cycles="
2097                                "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2098                                " MHz Clock\n",
2099                                (double) fwd_cycles / total_pkts,
2100                                fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2101                                (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2102                 }
2103         }
2104 }
2105
2106 void
2107 fwd_stats_reset(void)
2108 {
2109         streamid_t sm_id;
2110         portid_t pt_id;
2111         int i;
2112
2113         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2114                 pt_id = fwd_ports_ids[i];
2115                 rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2116         }
2117         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2118                 struct fwd_stream *fs = fwd_streams[sm_id];
2119
2120                 fs->rx_packets = 0;
2121                 fs->tx_packets = 0;
2122                 fs->fwd_dropped = 0;
2123                 fs->rx_bad_ip_csum = 0;
2124                 fs->rx_bad_l4_csum = 0;
2125                 fs->rx_bad_outer_l4_csum = 0;
2126                 fs->rx_bad_outer_ip_csum = 0;
2127
2128                 memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2129                 memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2130                 fs->core_cycles = 0;
2131         }
2132 }
2133
2134 static void
2135 flush_fwd_rx_queues(void)
2136 {
2137         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2138         portid_t  rxp;
2139         portid_t port_id;
2140         queueid_t rxq;
2141         uint16_t  nb_rx;
2142         uint16_t  i;
2143         uint8_t   j;
2144         uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2145         uint64_t timer_period;
2146
2147         if (num_procs > 1) {
2148                 printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2149                 return;
2150         }
2151
2152         /* convert to number of cycles */
2153         timer_period = rte_get_timer_hz(); /* 1 second timeout */
2154
2155         for (j = 0; j < 2; j++) {
2156                 for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2157                         for (rxq = 0; rxq < nb_rxq; rxq++) {
2158                                 port_id = fwd_ports_ids[rxp];
2159                                 /**
2160                                 * testpmd can stuck in the below do while loop
2161                                 * if rte_eth_rx_burst() always returns nonzero
2162                                 * packets. So timer is added to exit this loop
2163                                 * after 1sec timer expiry.
2164                                 */
2165                                 prev_tsc = rte_rdtsc();
2166                                 do {
2167                                         nb_rx = rte_eth_rx_burst(port_id, rxq,
2168                                                 pkts_burst, MAX_PKT_BURST);
2169                                         for (i = 0; i < nb_rx; i++)
2170                                                 rte_pktmbuf_free(pkts_burst[i]);
2171
2172                                         cur_tsc = rte_rdtsc();
2173                                         diff_tsc = cur_tsc - prev_tsc;
2174                                         timer_tsc += diff_tsc;
2175                                 } while ((nb_rx > 0) &&
2176                                         (timer_tsc < timer_period));
2177                                 timer_tsc = 0;
2178                         }
2179                 }
2180                 rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2181         }
2182 }
2183
2184 static void
2185 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2186 {
2187         struct fwd_stream **fsm;
2188         streamid_t nb_fs;
2189         streamid_t sm_id;
2190 #ifdef RTE_LIB_BITRATESTATS
2191         uint64_t tics_per_1sec;
2192         uint64_t tics_datum;
2193         uint64_t tics_current;
2194         uint16_t i, cnt_ports;
2195
2196         cnt_ports = nb_ports;
2197         tics_datum = rte_rdtsc();
2198         tics_per_1sec = rte_get_timer_hz();
2199 #endif
2200         fsm = &fwd_streams[fc->stream_idx];
2201         nb_fs = fc->stream_nb;
2202         do {
2203                 for (sm_id = 0; sm_id < nb_fs; sm_id++)
2204                         (*pkt_fwd)(fsm[sm_id]);
2205 #ifdef RTE_LIB_BITRATESTATS
2206                 if (bitrate_enabled != 0 &&
2207                                 bitrate_lcore_id == rte_lcore_id()) {
2208                         tics_current = rte_rdtsc();
2209                         if (tics_current - tics_datum >= tics_per_1sec) {
2210                                 /* Periodic bitrate calculation */
2211                                 for (i = 0; i < cnt_ports; i++)
2212                                         rte_stats_bitrate_calc(bitrate_data,
2213                                                 ports_ids[i]);
2214                                 tics_datum = tics_current;
2215                         }
2216                 }
2217 #endif
2218 #ifdef RTE_LIB_LATENCYSTATS
2219                 if (latencystats_enabled != 0 &&
2220                                 latencystats_lcore_id == rte_lcore_id())
2221                         rte_latencystats_update();
2222 #endif
2223
2224         } while (! fc->stopped);
2225 }
2226
2227 static int
2228 start_pkt_forward_on_core(void *fwd_arg)
2229 {
2230         run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2231                              cur_fwd_config.fwd_eng->packet_fwd);
2232         return 0;
2233 }
2234
2235 /*
2236  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2237  * Used to start communication flows in network loopback test configurations.
2238  */
2239 static int
2240 run_one_txonly_burst_on_core(void *fwd_arg)
2241 {
2242         struct fwd_lcore *fwd_lc;
2243         struct fwd_lcore tmp_lcore;
2244
2245         fwd_lc = (struct fwd_lcore *) fwd_arg;
2246         tmp_lcore = *fwd_lc;
2247         tmp_lcore.stopped = 1;
2248         run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2249         return 0;
2250 }
2251
2252 /*
2253  * Launch packet forwarding:
2254  *     - Setup per-port forwarding context.
2255  *     - launch logical cores with their forwarding configuration.
2256  */
2257 static void
2258 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2259 {
2260         unsigned int i;
2261         unsigned int lc_id;
2262         int diag;
2263
2264         for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2265                 lc_id = fwd_lcores_cpuids[i];
2266                 if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2267                         fwd_lcores[i]->stopped = 0;
2268                         diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2269                                                      fwd_lcores[i], lc_id);
2270                         if (diag != 0)
2271                                 fprintf(stderr,
2272                                         "launch lcore %u failed - diag=%d\n",
2273                                         lc_id, diag);
2274                 }
2275         }
2276 }
2277
2278 /*
2279  * Launch packet forwarding configuration.
2280  */
2281 void
2282 start_packet_forwarding(int with_tx_first)
2283 {
2284         port_fwd_begin_t port_fwd_begin;
2285         port_fwd_end_t  port_fwd_end;
2286         unsigned int i;
2287
2288         if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2289                 rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2290
2291         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2292                 rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2293
2294         if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2295                 strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2296                 (!nb_rxq || !nb_txq))
2297                 rte_exit(EXIT_FAILURE,
2298                         "Either rxq or txq are 0, cannot use %s fwd mode\n",
2299                         cur_fwd_eng->fwd_mode_name);
2300
2301         if (all_ports_started() == 0) {
2302                 fprintf(stderr, "Not all ports were started\n");
2303                 return;
2304         }
2305         if (test_done == 0) {
2306                 fprintf(stderr, "Packet forwarding already started\n");
2307                 return;
2308         }
2309
2310         fwd_config_setup();
2311
2312         pkt_fwd_config_display(&cur_fwd_config);
2313         if (!pkt_fwd_shared_rxq_check())
2314                 return;
2315
2316         port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2317         if (port_fwd_begin != NULL) {
2318                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2319                         if (port_fwd_begin(fwd_ports_ids[i])) {
2320                                 fprintf(stderr,
2321                                         "Packet forwarding is not ready\n");
2322                                 return;
2323                         }
2324                 }
2325         }
2326
2327         if (with_tx_first) {
2328                 port_fwd_begin = tx_only_engine.port_fwd_begin;
2329                 if (port_fwd_begin != NULL) {
2330                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2331                                 if (port_fwd_begin(fwd_ports_ids[i])) {
2332                                         fprintf(stderr,
2333                                                 "Packet forwarding is not ready\n");
2334                                         return;
2335                                 }
2336                         }
2337                 }
2338         }
2339
2340         test_done = 0;
2341
2342         if(!no_flush_rx)
2343                 flush_fwd_rx_queues();
2344
2345         rxtx_config_display();
2346
2347         fwd_stats_reset();
2348         if (with_tx_first) {
2349                 while (with_tx_first--) {
2350                         launch_packet_forwarding(
2351                                         run_one_txonly_burst_on_core);
2352                         rte_eal_mp_wait_lcore();
2353                 }
2354                 port_fwd_end = tx_only_engine.port_fwd_end;
2355                 if (port_fwd_end != NULL) {
2356                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2357                                 (*port_fwd_end)(fwd_ports_ids[i]);
2358                 }
2359         }
2360         launch_packet_forwarding(start_pkt_forward_on_core);
2361 }
2362
2363 void
2364 stop_packet_forwarding(void)
2365 {
2366         port_fwd_end_t port_fwd_end;
2367         lcoreid_t lc_id;
2368         portid_t pt_id;
2369         int i;
2370
2371         if (test_done) {
2372                 fprintf(stderr, "Packet forwarding not started\n");
2373                 return;
2374         }
2375         printf("Telling cores to stop...");
2376         for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2377                 fwd_lcores[lc_id]->stopped = 1;
2378         printf("\nWaiting for lcores to finish...\n");
2379         rte_eal_mp_wait_lcore();
2380         port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2381         if (port_fwd_end != NULL) {
2382                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2383                         pt_id = fwd_ports_ids[i];
2384                         (*port_fwd_end)(pt_id);
2385                 }
2386         }
2387
2388         fwd_stats_display();
2389
2390         printf("\nDone.\n");
2391         test_done = 1;
2392 }
2393
2394 void
2395 dev_set_link_up(portid_t pid)
2396 {
2397         if (rte_eth_dev_set_link_up(pid) < 0)
2398                 fprintf(stderr, "\nSet link up fail.\n");
2399 }
2400
2401 void
2402 dev_set_link_down(portid_t pid)
2403 {
2404         if (rte_eth_dev_set_link_down(pid) < 0)
2405                 fprintf(stderr, "\nSet link down fail.\n");
2406 }
2407
2408 static int
2409 all_ports_started(void)
2410 {
2411         portid_t pi;
2412         struct rte_port *port;
2413
2414         RTE_ETH_FOREACH_DEV(pi) {
2415                 port = &ports[pi];
2416                 /* Check if there is a port which is not started */
2417                 if ((port->port_status != RTE_PORT_STARTED) &&
2418                         (port->slave_flag == 0))
2419                         return 0;
2420         }
2421
2422         /* No port is not started */
2423         return 1;
2424 }
2425
2426 int
2427 port_is_stopped(portid_t port_id)
2428 {
2429         struct rte_port *port = &ports[port_id];
2430
2431         if ((port->port_status != RTE_PORT_STOPPED) &&
2432             (port->slave_flag == 0))
2433                 return 0;
2434         return 1;
2435 }
2436
2437 int
2438 all_ports_stopped(void)
2439 {
2440         portid_t pi;
2441
2442         RTE_ETH_FOREACH_DEV(pi) {
2443                 if (!port_is_stopped(pi))
2444                         return 0;
2445         }
2446
2447         return 1;
2448 }
2449
2450 int
2451 port_is_started(portid_t port_id)
2452 {
2453         if (port_id_is_invalid(port_id, ENABLED_WARN))
2454                 return 0;
2455
2456         if (ports[port_id].port_status != RTE_PORT_STARTED)
2457                 return 0;
2458
2459         return 1;
2460 }
2461
2462 /* Configure the Rx and Tx hairpin queues for the selected port. */
2463 static int
2464 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2465 {
2466         queueid_t qi;
2467         struct rte_eth_hairpin_conf hairpin_conf = {
2468                 .peer_count = 1,
2469         };
2470         int i;
2471         int diag;
2472         struct rte_port *port = &ports[pi];
2473         uint16_t peer_rx_port = pi;
2474         uint16_t peer_tx_port = pi;
2475         uint32_t manual = 1;
2476         uint32_t tx_exp = hairpin_mode & 0x10;
2477
2478         if (!(hairpin_mode & 0xf)) {
2479                 peer_rx_port = pi;
2480                 peer_tx_port = pi;
2481                 manual = 0;
2482         } else if (hairpin_mode & 0x1) {
2483                 peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2484                                                        RTE_ETH_DEV_NO_OWNER);
2485                 if (peer_tx_port >= RTE_MAX_ETHPORTS)
2486                         peer_tx_port = rte_eth_find_next_owned_by(0,
2487                                                 RTE_ETH_DEV_NO_OWNER);
2488                 if (p_pi != RTE_MAX_ETHPORTS) {
2489                         peer_rx_port = p_pi;
2490                 } else {
2491                         uint16_t next_pi;
2492
2493                         /* Last port will be the peer RX port of the first. */
2494                         RTE_ETH_FOREACH_DEV(next_pi)
2495                                 peer_rx_port = next_pi;
2496                 }
2497                 manual = 1;
2498         } else if (hairpin_mode & 0x2) {
2499                 if (cnt_pi & 0x1) {
2500                         peer_rx_port = p_pi;
2501                 } else {
2502                         peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2503                                                 RTE_ETH_DEV_NO_OWNER);
2504                         if (peer_rx_port >= RTE_MAX_ETHPORTS)
2505                                 peer_rx_port = pi;
2506                 }
2507                 peer_tx_port = peer_rx_port;
2508                 manual = 1;
2509         }
2510
2511         for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2512                 hairpin_conf.peers[0].port = peer_rx_port;
2513                 hairpin_conf.peers[0].queue = i + nb_rxq;
2514                 hairpin_conf.manual_bind = !!manual;
2515                 hairpin_conf.tx_explicit = !!tx_exp;
2516                 diag = rte_eth_tx_hairpin_queue_setup
2517                         (pi, qi, nb_txd, &hairpin_conf);
2518                 i++;
2519                 if (diag == 0)
2520                         continue;
2521
2522                 /* Fail to setup rx queue, return */
2523                 if (rte_atomic16_cmpset(&(port->port_status),
2524                                         RTE_PORT_HANDLING,
2525                                         RTE_PORT_STOPPED) == 0)
2526                         fprintf(stderr,
2527                                 "Port %d can not be set back to stopped\n", pi);
2528                 fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2529                         pi);
2530                 /* try to reconfigure queues next time */
2531                 port->need_reconfig_queues = 1;
2532                 return -1;
2533         }
2534         for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2535                 hairpin_conf.peers[0].port = peer_tx_port;
2536                 hairpin_conf.peers[0].queue = i + nb_txq;
2537                 hairpin_conf.manual_bind = !!manual;
2538                 hairpin_conf.tx_explicit = !!tx_exp;
2539                 diag = rte_eth_rx_hairpin_queue_setup
2540                         (pi, qi, nb_rxd, &hairpin_conf);
2541                 i++;
2542                 if (diag == 0)
2543                         continue;
2544
2545                 /* Fail to setup rx queue, return */
2546                 if (rte_atomic16_cmpset(&(port->port_status),
2547                                         RTE_PORT_HANDLING,
2548                                         RTE_PORT_STOPPED) == 0)
2549                         fprintf(stderr,
2550                                 "Port %d can not be set back to stopped\n", pi);
2551                 fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2552                         pi);
2553                 /* try to reconfigure queues next time */
2554                 port->need_reconfig_queues = 1;
2555                 return -1;
2556         }
2557         return 0;
2558 }
2559
2560 /* Configure the Rx with optional split. */
2561 int
2562 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2563                uint16_t nb_rx_desc, unsigned int socket_id,
2564                struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2565 {
2566         union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2567         unsigned int i, mp_n;
2568         int ret;
2569
2570         if (rx_pkt_nb_segs <= 1 ||
2571             (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2572                 rx_conf->rx_seg = NULL;
2573                 rx_conf->rx_nseg = 0;
2574                 ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2575                                              nb_rx_desc, socket_id,
2576                                              rx_conf, mp);
2577                 return ret;
2578         }
2579         for (i = 0; i < rx_pkt_nb_segs; i++) {
2580                 struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2581                 struct rte_mempool *mpx;
2582                 /*
2583                  * Use last valid pool for the segments with number
2584                  * exceeding the pool index.
2585                  */
2586                 mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2587                 mpx = mbuf_pool_find(socket_id, mp_n);
2588                 /* Handle zero as mbuf data buffer size. */
2589                 rx_seg->length = rx_pkt_seg_lengths[i] ?
2590                                    rx_pkt_seg_lengths[i] :
2591                                    mbuf_data_size[mp_n];
2592                 rx_seg->offset = i < rx_pkt_nb_offs ?
2593                                    rx_pkt_seg_offsets[i] : 0;
2594                 rx_seg->mp = mpx ? mpx : mp;
2595         }
2596         rx_conf->rx_nseg = rx_pkt_nb_segs;
2597         rx_conf->rx_seg = rx_useg;
2598         ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2599                                     socket_id, rx_conf, NULL);
2600         rx_conf->rx_seg = NULL;
2601         rx_conf->rx_nseg = 0;
2602         return ret;
2603 }
2604
2605 static int
2606 alloc_xstats_display_info(portid_t pi)
2607 {
2608         uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2609         uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2610         uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2611
2612         if (xstats_display_num == 0)
2613                 return 0;
2614
2615         *ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2616         if (*ids_supp == NULL)
2617                 goto fail_ids_supp;
2618
2619         *prev_values = calloc(xstats_display_num,
2620                               sizeof(**prev_values));
2621         if (*prev_values == NULL)
2622                 goto fail_prev_values;
2623
2624         *curr_values = calloc(xstats_display_num,
2625                               sizeof(**curr_values));
2626         if (*curr_values == NULL)
2627                 goto fail_curr_values;
2628
2629         ports[pi].xstats_info.allocated = true;
2630
2631         return 0;
2632
2633 fail_curr_values:
2634         free(*prev_values);
2635 fail_prev_values:
2636         free(*ids_supp);
2637 fail_ids_supp:
2638         return -ENOMEM;
2639 }
2640
2641 static void
2642 free_xstats_display_info(portid_t pi)
2643 {
2644         if (!ports[pi].xstats_info.allocated)
2645                 return;
2646         free(ports[pi].xstats_info.ids_supp);
2647         free(ports[pi].xstats_info.prev_values);
2648         free(ports[pi].xstats_info.curr_values);
2649         ports[pi].xstats_info.allocated = false;
2650 }
2651
2652 /** Fill helper structures for specified port to show extended statistics. */
2653 static void
2654 fill_xstats_display_info_for_port(portid_t pi)
2655 {
2656         unsigned int stat, stat_supp;
2657         const char *xstat_name;
2658         struct rte_port *port;
2659         uint64_t *ids_supp;
2660         int rc;
2661
2662         if (xstats_display_num == 0)
2663                 return;
2664
2665         if (pi == (portid_t)RTE_PORT_ALL) {
2666                 fill_xstats_display_info();
2667                 return;
2668         }
2669
2670         port = &ports[pi];
2671         if (port->port_status != RTE_PORT_STARTED)
2672                 return;
2673
2674         if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2675                 rte_exit(EXIT_FAILURE,
2676                          "Failed to allocate xstats display memory\n");
2677
2678         ids_supp = port->xstats_info.ids_supp;
2679         for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2680                 xstat_name = xstats_display[stat].name;
2681                 rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2682                                                    ids_supp + stat_supp);
2683                 if (rc != 0) {
2684                         fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2685                                 xstat_name, pi, stat);
2686                         continue;
2687                 }
2688                 stat_supp++;
2689         }
2690
2691         port->xstats_info.ids_supp_sz = stat_supp;
2692 }
2693
2694 /** Fill helper structures for all ports to show extended statistics. */
2695 static void
2696 fill_xstats_display_info(void)
2697 {
2698         portid_t pi;
2699
2700         if (xstats_display_num == 0)
2701                 return;
2702
2703         RTE_ETH_FOREACH_DEV(pi)
2704                 fill_xstats_display_info_for_port(pi);
2705 }
2706
2707 int
2708 start_port(portid_t pid)
2709 {
2710         int diag, need_check_link_status = -1;
2711         portid_t pi;
2712         portid_t p_pi = RTE_MAX_ETHPORTS;
2713         portid_t pl[RTE_MAX_ETHPORTS];
2714         portid_t peer_pl[RTE_MAX_ETHPORTS];
2715         uint16_t cnt_pi = 0;
2716         uint16_t cfg_pi = 0;
2717         int peer_pi;
2718         queueid_t qi;
2719         struct rte_port *port;
2720         struct rte_eth_hairpin_cap cap;
2721
2722         if (port_id_is_invalid(pid, ENABLED_WARN))
2723                 return 0;
2724
2725         RTE_ETH_FOREACH_DEV(pi) {
2726                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2727                         continue;
2728
2729                 need_check_link_status = 0;
2730                 port = &ports[pi];
2731                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2732                                                  RTE_PORT_HANDLING) == 0) {
2733                         fprintf(stderr, "Port %d is now not stopped\n", pi);
2734                         continue;
2735                 }
2736
2737                 if (port->need_reconfig > 0) {
2738                         struct rte_eth_conf dev_conf;
2739                         int k;
2740
2741                         port->need_reconfig = 0;
2742
2743                         if (flow_isolate_all) {
2744                                 int ret = port_flow_isolate(pi, 1);
2745                                 if (ret) {
2746                                         fprintf(stderr,
2747                                                 "Failed to apply isolated mode on port %d\n",
2748                                                 pi);
2749                                         return -1;
2750                                 }
2751                         }
2752                         configure_rxtx_dump_callbacks(0);
2753                         printf("Configuring Port %d (socket %u)\n", pi,
2754                                         port->socket_id);
2755                         if (nb_hairpinq > 0 &&
2756                             rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2757                                 fprintf(stderr,
2758                                         "Port %d doesn't support hairpin queues\n",
2759                                         pi);
2760                                 return -1;
2761                         }
2762
2763                         /* configure port */
2764                         diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2765                                                      nb_txq + nb_hairpinq,
2766                                                      &(port->dev_conf));
2767                         if (diag != 0) {
2768                                 if (rte_atomic16_cmpset(&(port->port_status),
2769                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2770                                         fprintf(stderr,
2771                                                 "Port %d can not be set back to stopped\n",
2772                                                 pi);
2773                                 fprintf(stderr, "Fail to configure port %d\n",
2774                                         pi);
2775                                 /* try to reconfigure port next time */
2776                                 port->need_reconfig = 1;
2777                                 return -1;
2778                         }
2779                         /* get device configuration*/
2780                         if (0 !=
2781                                 eth_dev_conf_get_print_err(pi, &dev_conf)) {
2782                                 fprintf(stderr,
2783                                         "port %d can not get device configuration\n",
2784                                         pi);
2785                                 return -1;
2786                         }
2787                         /* Apply Rx offloads configuration */
2788                         if (dev_conf.rxmode.offloads !=
2789                             port->dev_conf.rxmode.offloads) {
2790                                 port->dev_conf.rxmode.offloads |=
2791                                         dev_conf.rxmode.offloads;
2792                                 for (k = 0;
2793                                      k < port->dev_info.max_rx_queues;
2794                                      k++)
2795                                         port->rx_conf[k].offloads |=
2796                                                 dev_conf.rxmode.offloads;
2797                         }
2798                         /* Apply Tx offloads configuration */
2799                         if (dev_conf.txmode.offloads !=
2800                             port->dev_conf.txmode.offloads) {
2801                                 port->dev_conf.txmode.offloads |=
2802                                         dev_conf.txmode.offloads;
2803                                 for (k = 0;
2804                                      k < port->dev_info.max_tx_queues;
2805                                      k++)
2806                                         port->tx_conf[k].offloads |=
2807                                                 dev_conf.txmode.offloads;
2808                         }
2809                 }
2810                 if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2811                         port->need_reconfig_queues = 0;
2812                         /* setup tx queues */
2813                         for (qi = 0; qi < nb_txq; qi++) {
2814                                 if ((numa_support) &&
2815                                         (txring_numa[pi] != NUMA_NO_CONFIG))
2816                                         diag = rte_eth_tx_queue_setup(pi, qi,
2817                                                 port->nb_tx_desc[qi],
2818                                                 txring_numa[pi],
2819                                                 &(port->tx_conf[qi]));
2820                                 else
2821                                         diag = rte_eth_tx_queue_setup(pi, qi,
2822                                                 port->nb_tx_desc[qi],
2823                                                 port->socket_id,
2824                                                 &(port->tx_conf[qi]));
2825
2826                                 if (diag == 0)
2827                                         continue;
2828
2829                                 /* Fail to setup tx queue, return */
2830                                 if (rte_atomic16_cmpset(&(port->port_status),
2831                                                         RTE_PORT_HANDLING,
2832                                                         RTE_PORT_STOPPED) == 0)
2833                                         fprintf(stderr,
2834                                                 "Port %d can not be set back to stopped\n",
2835                                                 pi);
2836                                 fprintf(stderr,
2837                                         "Fail to configure port %d tx queues\n",
2838                                         pi);
2839                                 /* try to reconfigure queues next time */
2840                                 port->need_reconfig_queues = 1;
2841                                 return -1;
2842                         }
2843                         for (qi = 0; qi < nb_rxq; qi++) {
2844                                 /* setup rx queues */
2845                                 if ((numa_support) &&
2846                                         (rxring_numa[pi] != NUMA_NO_CONFIG)) {
2847                                         struct rte_mempool * mp =
2848                                                 mbuf_pool_find
2849                                                         (rxring_numa[pi], 0);
2850                                         if (mp == NULL) {
2851                                                 fprintf(stderr,
2852                                                         "Failed to setup RX queue: No mempool allocation on the socket %d\n",
2853                                                         rxring_numa[pi]);
2854                                                 return -1;
2855                                         }
2856
2857                                         diag = rx_queue_setup(pi, qi,
2858                                              port->nb_rx_desc[qi],
2859                                              rxring_numa[pi],
2860                                              &(port->rx_conf[qi]),
2861                                              mp);
2862                                 } else {
2863                                         struct rte_mempool *mp =
2864                                                 mbuf_pool_find
2865                                                         (port->socket_id, 0);
2866                                         if (mp == NULL) {
2867                                                 fprintf(stderr,
2868                                                         "Failed to setup RX queue: No mempool allocation on the socket %d\n",
2869                                                         port->socket_id);
2870                                                 return -1;
2871                                         }
2872                                         diag = rx_queue_setup(pi, qi,
2873                                              port->nb_rx_desc[qi],
2874                                              port->socket_id,
2875                                              &(port->rx_conf[qi]),
2876                                              mp);
2877                                 }
2878                                 if (diag == 0)
2879                                         continue;
2880
2881                                 /* Fail to setup rx queue, return */
2882                                 if (rte_atomic16_cmpset(&(port->port_status),
2883                                                         RTE_PORT_HANDLING,
2884                                                         RTE_PORT_STOPPED) == 0)
2885                                         fprintf(stderr,
2886                                                 "Port %d can not be set back to stopped\n",
2887                                                 pi);
2888                                 fprintf(stderr,
2889                                         "Fail to configure port %d rx queues\n",
2890                                         pi);
2891                                 /* try to reconfigure queues next time */
2892                                 port->need_reconfig_queues = 1;
2893                                 return -1;
2894                         }
2895                         /* setup hairpin queues */
2896                         if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2897                                 return -1;
2898                 }
2899                 configure_rxtx_dump_callbacks(verbose_level);
2900                 if (clear_ptypes) {
2901                         diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2902                                         NULL, 0);
2903                         if (diag < 0)
2904                                 fprintf(stderr,
2905                                         "Port %d: Failed to disable Ptype parsing\n",
2906                                         pi);
2907                 }
2908
2909                 p_pi = pi;
2910                 cnt_pi++;
2911
2912                 /* start port */
2913                 diag = eth_dev_start_mp(pi);
2914                 if (diag < 0) {
2915                         fprintf(stderr, "Fail to start port %d: %s\n",
2916                                 pi, rte_strerror(-diag));
2917
2918                         /* Fail to setup rx queue, return */
2919                         if (rte_atomic16_cmpset(&(port->port_status),
2920                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2921                                 fprintf(stderr,
2922                                         "Port %d can not be set back to stopped\n",
2923                                         pi);
2924                         continue;
2925                 }
2926
2927                 if (rte_atomic16_cmpset(&(port->port_status),
2928                         RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2929                         fprintf(stderr, "Port %d can not be set into started\n",
2930                                 pi);
2931
2932                 if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2933                         printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
2934                                         RTE_ETHER_ADDR_BYTES(&port->eth_addr));
2935
2936                 /* at least one port started, need checking link status */
2937                 need_check_link_status = 1;
2938
2939                 pl[cfg_pi++] = pi;
2940         }
2941
2942         if (need_check_link_status == 1 && !no_link_check)
2943                 check_all_ports_link_status(RTE_PORT_ALL);
2944         else if (need_check_link_status == 0)
2945                 fprintf(stderr, "Please stop the ports first\n");
2946
2947         if (hairpin_mode & 0xf) {
2948                 uint16_t i;
2949                 int j;
2950
2951                 /* bind all started hairpin ports */
2952                 for (i = 0; i < cfg_pi; i++) {
2953                         pi = pl[i];
2954                         /* bind current Tx to all peer Rx */
2955                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2956                                                         RTE_MAX_ETHPORTS, 1);
2957                         if (peer_pi < 0)
2958                                 return peer_pi;
2959                         for (j = 0; j < peer_pi; j++) {
2960                                 if (!port_is_started(peer_pl[j]))
2961                                         continue;
2962                                 diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2963                                 if (diag < 0) {
2964                                         fprintf(stderr,
2965                                                 "Error during binding hairpin Tx port %u to %u: %s\n",
2966                                                 pi, peer_pl[j],
2967                                                 rte_strerror(-diag));
2968                                         return -1;
2969                                 }
2970                         }
2971                         /* bind all peer Tx to current Rx */
2972                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2973                                                         RTE_MAX_ETHPORTS, 0);
2974                         if (peer_pi < 0)
2975                                 return peer_pi;
2976                         for (j = 0; j < peer_pi; j++) {
2977                                 if (!port_is_started(peer_pl[j]))
2978                                         continue;
2979                                 diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2980                                 if (diag < 0) {
2981                                         fprintf(stderr,
2982                                                 "Error during binding hairpin Tx port %u to %u: %s\n",
2983                                                 peer_pl[j], pi,
2984                                                 rte_strerror(-diag));
2985                                         return -1;
2986                                 }
2987                         }
2988                 }
2989         }
2990
2991         fill_xstats_display_info_for_port(pid);
2992
2993         printf("Done\n");
2994         return 0;
2995 }
2996
2997 void
2998 stop_port(portid_t pid)
2999 {
3000         portid_t pi;
3001         struct rte_port *port;
3002         int need_check_link_status = 0;
3003         portid_t peer_pl[RTE_MAX_ETHPORTS];
3004         int peer_pi;
3005
3006         if (port_id_is_invalid(pid, ENABLED_WARN))
3007                 return;
3008
3009         printf("Stopping ports...\n");
3010
3011         RTE_ETH_FOREACH_DEV(pi) {
3012                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3013                         continue;
3014
3015                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
3016                         fprintf(stderr,
3017                                 "Please remove port %d from forwarding configuration.\n",
3018                                 pi);
3019                         continue;
3020                 }
3021
3022                 if (port_is_bonding_slave(pi)) {
3023                         fprintf(stderr,
3024                                 "Please remove port %d from bonded device.\n",
3025                                 pi);
3026                         continue;
3027                 }
3028
3029                 port = &ports[pi];
3030                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
3031                                                 RTE_PORT_HANDLING) == 0)
3032                         continue;
3033
3034                 if (hairpin_mode & 0xf) {
3035                         int j;
3036
3037                         rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3038                         /* unbind all peer Tx from current Rx */
3039                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3040                                                         RTE_MAX_ETHPORTS, 0);
3041                         if (peer_pi < 0)
3042                                 continue;
3043                         for (j = 0; j < peer_pi; j++) {
3044                                 if (!port_is_started(peer_pl[j]))
3045                                         continue;
3046                                 rte_eth_hairpin_unbind(peer_pl[j], pi);
3047                         }
3048                 }
3049
3050                 if (port->flow_list)
3051                         port_flow_flush(pi);
3052
3053                 if (eth_dev_stop_mp(pi) != 0)
3054                         RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3055                                 pi);
3056
3057                 if (rte_atomic16_cmpset(&(port->port_status),
3058                         RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
3059                         fprintf(stderr, "Port %d can not be set into stopped\n",
3060                                 pi);
3061                 need_check_link_status = 1;
3062         }
3063         if (need_check_link_status && !no_link_check)
3064                 check_all_ports_link_status(RTE_PORT_ALL);
3065
3066         printf("Done\n");
3067 }
3068
3069 static void
3070 remove_invalid_ports_in(portid_t *array, portid_t *total)
3071 {
3072         portid_t i;
3073         portid_t new_total = 0;
3074
3075         for (i = 0; i < *total; i++)
3076                 if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3077                         array[new_total] = array[i];
3078                         new_total++;
3079                 }
3080         *total = new_total;
3081 }
3082
3083 static void
3084 remove_invalid_ports(void)
3085 {
3086         remove_invalid_ports_in(ports_ids, &nb_ports);
3087         remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3088         nb_cfg_ports = nb_fwd_ports;
3089 }
3090
3091 void
3092 close_port(portid_t pid)
3093 {
3094         portid_t pi;
3095         struct rte_port *port;
3096
3097         if (port_id_is_invalid(pid, ENABLED_WARN))
3098                 return;
3099
3100         printf("Closing ports...\n");
3101
3102         RTE_ETH_FOREACH_DEV(pi) {
3103                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3104                         continue;
3105
3106                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
3107                         fprintf(stderr,
3108                                 "Please remove port %d from forwarding configuration.\n",
3109                                 pi);
3110                         continue;
3111                 }
3112
3113                 if (port_is_bonding_slave(pi)) {
3114                         fprintf(stderr,
3115                                 "Please remove port %d from bonded device.\n",
3116                                 pi);
3117                         continue;
3118                 }
3119
3120                 port = &ports[pi];
3121                 if (rte_atomic16_cmpset(&(port->port_status),
3122                         RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
3123                         fprintf(stderr, "Port %d is already closed\n", pi);
3124                         continue;
3125                 }
3126
3127                 if (is_proc_primary()) {
3128                         port_flow_flush(pi);
3129                         port_flex_item_flush(pi);
3130                         rte_eth_dev_close(pi);
3131                 }
3132
3133                 free_xstats_display_info(pi);
3134         }
3135
3136         remove_invalid_ports();
3137         printf("Done\n");
3138 }
3139
3140 void
3141 reset_port(portid_t pid)
3142 {
3143         int diag;
3144         portid_t pi;
3145         struct rte_port *port;
3146
3147         if (port_id_is_invalid(pid, ENABLED_WARN))
3148                 return;
3149
3150         if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3151                 (pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3152                 fprintf(stderr,
3153                         "Can not reset port(s), please stop port(s) first.\n");
3154                 return;
3155         }
3156
3157         printf("Resetting ports...\n");
3158
3159         RTE_ETH_FOREACH_DEV(pi) {
3160                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3161                         continue;
3162
3163                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
3164                         fprintf(stderr,
3165                                 "Please remove port %d from forwarding configuration.\n",
3166                                 pi);
3167                         continue;
3168                 }
3169
3170                 if (port_is_bonding_slave(pi)) {
3171                         fprintf(stderr,
3172                                 "Please remove port %d from bonded device.\n",
3173                                 pi);
3174                         continue;
3175                 }
3176
3177                 diag = rte_eth_dev_reset(pi);
3178                 if (diag == 0) {
3179                         port = &ports[pi];
3180                         port->need_reconfig = 1;
3181                         port->need_reconfig_queues = 1;
3182                 } else {
3183                         fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3184                                 pi, diag);
3185                 }
3186         }
3187
3188         printf("Done\n");
3189 }
3190
3191 void
3192 attach_port(char *identifier)
3193 {
3194         portid_t pi;
3195         struct rte_dev_iterator iterator;
3196
3197         printf("Attaching a new port...\n");
3198
3199         if (identifier == NULL) {
3200                 fprintf(stderr, "Invalid parameters are specified\n");
3201                 return;
3202         }
3203
3204         if (rte_dev_probe(identifier) < 0) {
3205                 TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3206                 return;
3207         }
3208
3209         /* first attach mode: event */
3210         if (setup_on_probe_event) {
3211                 /* new ports are detected on RTE_ETH_EVENT_NEW event */
3212                 for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3213                         if (ports[pi].port_status == RTE_PORT_HANDLING &&
3214                                         ports[pi].need_setup != 0)
3215                                 setup_attached_port(pi);
3216                 return;
3217         }
3218
3219         /* second attach mode: iterator */
3220         RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3221                 /* setup ports matching the devargs used for probing */
3222                 if (port_is_forwarding(pi))
3223                         continue; /* port was already attached before */
3224                 setup_attached_port(pi);
3225         }
3226 }
3227
3228 static void
3229 setup_attached_port(portid_t pi)
3230 {
3231         unsigned int socket_id;
3232         int ret;
3233
3234         socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3235         /* if socket_id is invalid, set to the first available socket. */
3236         if (check_socket_id(socket_id) < 0)
3237                 socket_id = socket_ids[0];
3238         reconfig(pi, socket_id);
3239         ret = rte_eth_promiscuous_enable(pi);
3240         if (ret != 0)
3241                 fprintf(stderr,
3242                         "Error during enabling promiscuous mode for port %u: %s - ignore\n",
3243                         pi, rte_strerror(-ret));
3244
3245         ports_ids[nb_ports++] = pi;
3246         fwd_ports_ids[nb_fwd_ports++] = pi;
3247         nb_cfg_ports = nb_fwd_ports;
3248         ports[pi].need_setup = 0;
3249         ports[pi].port_status = RTE_PORT_STOPPED;
3250
3251         printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3252         printf("Done\n");
3253 }
3254
3255 static void
3256 detach_device(struct rte_device *dev)
3257 {
3258         portid_t sibling;
3259
3260         if (dev == NULL) {
3261                 fprintf(stderr, "Device already removed\n");
3262                 return;
3263         }
3264
3265         printf("Removing a device...\n");
3266
3267         RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3268                 if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3269                         if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3270                                 fprintf(stderr, "Port %u not stopped\n",
3271                                         sibling);
3272                                 return;
3273                         }
3274                         port_flow_flush(sibling);
3275                 }
3276         }
3277
3278         if (rte_dev_remove(dev) < 0) {
3279                 TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
3280                 return;
3281         }
3282         remove_invalid_ports();
3283
3284         printf("Device is detached\n");
3285         printf("Now total ports is %d\n", nb_ports);
3286         printf("Done\n");
3287         return;
3288 }
3289
3290 void
3291 detach_port_device(portid_t port_id)
3292 {
3293         int ret;
3294         struct rte_eth_dev_info dev_info;
3295
3296         if (port_id_is_invalid(port_id, ENABLED_WARN))
3297                 return;
3298
3299         if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3300                 if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3301                         fprintf(stderr, "Port not stopped\n");
3302                         return;
3303                 }
3304                 fprintf(stderr, "Port was not closed\n");
3305         }
3306
3307         ret = eth_dev_info_get_print_err(port_id, &dev_info);
3308         if (ret != 0) {
3309                 TESTPMD_LOG(ERR,
3310                         "Failed to get device info for port %d, not detaching\n",
3311                         port_id);
3312                 return;
3313         }
3314         detach_device(dev_info.device);
3315 }
3316
3317 void
3318 detach_devargs(char *identifier)
3319 {
3320         struct rte_dev_iterator iterator;
3321         struct rte_devargs da;
3322         portid_t port_id;
3323
3324         printf("Removing a device...\n");
3325
3326         memset(&da, 0, sizeof(da));
3327         if (rte_devargs_parsef(&da, "%s", identifier)) {
3328                 fprintf(stderr, "cannot parse identifier\n");
3329                 return;
3330         }
3331
3332         RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3333                 if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3334                         if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3335                                 fprintf(stderr, "Port %u not stopped\n",
3336                                         port_id);
3337                                 rte_eth_iterator_cleanup(&iterator);
3338                                 rte_devargs_reset(&da);
3339                                 return;
3340                         }
3341                         port_flow_flush(port_id);
3342                 }
3343         }
3344
3345         if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3346                 TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3347                             da.name, da.bus->name);
3348                 rte_devargs_reset(&da);
3349                 return;
3350         }
3351
3352         remove_invalid_ports();
3353
3354         printf("Device %s is detached\n", identifier);
3355         printf("Now total ports is %d\n", nb_ports);
3356         printf("Done\n");
3357         rte_devargs_reset(&da);
3358 }
3359
3360 void
3361 pmd_test_exit(void)
3362 {
3363         portid_t pt_id;
3364         unsigned int i;
3365         int ret;
3366
3367         if (test_done == 0)
3368                 stop_packet_forwarding();
3369
3370 #ifndef RTE_EXEC_ENV_WINDOWS
3371         for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3372                 if (mempools[i]) {
3373                         if (mp_alloc_type == MP_ALLOC_ANON)
3374                                 rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3375                                                      NULL);
3376                 }
3377         }
3378 #endif
3379         if (ports != NULL) {
3380                 no_link_check = 1;
3381                 RTE_ETH_FOREACH_DEV(pt_id) {
3382                         printf("\nStopping port %d...\n", pt_id);
3383                         fflush(stdout);
3384                         stop_port(pt_id);
3385                 }
3386                 RTE_ETH_FOREACH_DEV(pt_id) {
3387                         printf("\nShutting down port %d...\n", pt_id);
3388                         fflush(stdout);
3389                         close_port(pt_id);
3390                 }
3391         }
3392
3393         if (hot_plug) {
3394                 ret = rte_dev_event_monitor_stop();
3395                 if (ret) {
3396                         RTE_LOG(ERR, EAL,
3397                                 "fail to stop device event monitor.");
3398                         return;
3399                 }
3400
3401                 ret = rte_dev_event_callback_unregister(NULL,
3402                         dev_event_callback, NULL);
3403                 if (ret < 0) {
3404                         RTE_LOG(ERR, EAL,
3405                                 "fail to unregister device event callback.\n");
3406                         return;
3407                 }
3408
3409                 ret = rte_dev_hotplug_handle_disable();
3410                 if (ret) {
3411                         RTE_LOG(ERR, EAL,
3412                                 "fail to disable hotplug handling.\n");
3413                         return;
3414                 }
3415         }
3416         for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3417                 if (mempools[i])
3418                         mempool_free_mp(mempools[i]);
3419         }
3420         free(xstats_display);
3421
3422         printf("\nBye...\n");
3423 }
3424
3425 typedef void (*cmd_func_t)(void);
3426 struct pmd_test_command {
3427         const char *cmd_name;
3428         cmd_func_t cmd_func;
3429 };
3430
3431 /* Check the link status of all ports in up to 9s, and print them finally */
3432 static void
3433 check_all_ports_link_status(uint32_t port_mask)
3434 {
3435 #define CHECK_INTERVAL 100 /* 100ms */
3436 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3437         portid_t portid;
3438         uint8_t count, all_ports_up, print_flag = 0;
3439         struct rte_eth_link link;
3440         int ret;
3441         char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3442
3443         printf("Checking link statuses...\n");
3444         fflush(stdout);
3445         for (count = 0; count <= MAX_CHECK_TIME; count++) {
3446                 all_ports_up = 1;
3447                 RTE_ETH_FOREACH_DEV(portid) {
3448                         if ((port_mask & (1 << portid)) == 0)
3449                                 continue;
3450                         memset(&link, 0, sizeof(link));
3451                         ret = rte_eth_link_get_nowait(portid, &link);
3452                         if (ret < 0) {
3453                                 all_ports_up = 0;
3454                                 if (print_flag == 1)
3455                                         fprintf(stderr,
3456                                                 "Port %u link get failed: %s\n",
3457                                                 portid, rte_strerror(-ret));
3458                                 continue;
3459                         }
3460                         /* print link status if flag set */
3461                         if (print_flag == 1) {
3462                                 rte_eth_link_to_str(link_status,
3463                                         sizeof(link_status), &link);
3464                                 printf("Port %d %s\n", portid, link_status);
3465                                 continue;
3466                         }
3467                         /* clear all_ports_up flag if any link down */
3468                         if (link.link_status == ETH_LINK_DOWN) {
3469                                 all_ports_up = 0;
3470                                 break;
3471                         }
3472                 }
3473                 /* after finally printing all link status, get out */
3474                 if (print_flag == 1)
3475                         break;
3476
3477                 if (all_ports_up == 0) {
3478                         fflush(stdout);
3479                         rte_delay_ms(CHECK_INTERVAL);
3480                 }
3481
3482                 /* set the print_flag if all ports up or timeout */
3483                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3484                         print_flag = 1;
3485                 }
3486
3487                 if (lsc_interrupt)
3488                         break;
3489         }
3490 }
3491
3492 static void
3493 rmv_port_callback(void *arg)
3494 {
3495         int need_to_start = 0;
3496         int org_no_link_check = no_link_check;
3497         portid_t port_id = (intptr_t)arg;
3498         struct rte_eth_dev_info dev_info;
3499         int ret;
3500
3501         RTE_ETH_VALID_PORTID_OR_RET(port_id);
3502
3503         if (!test_done && port_is_forwarding(port_id)) {
3504                 need_to_start = 1;
3505                 stop_packet_forwarding();
3506         }
3507         no_link_check = 1;
3508         stop_port(port_id);
3509         no_link_check = org_no_link_check;
3510
3511         ret = eth_dev_info_get_print_err(port_id, &dev_info);
3512         if (ret != 0)
3513                 TESTPMD_LOG(ERR,
3514                         "Failed to get device info for port %d, not detaching\n",
3515                         port_id);
3516         else {
3517                 struct rte_device *device = dev_info.device;
3518                 close_port(port_id);
3519                 detach_device(device); /* might be already removed or have more ports */
3520         }
3521         if (need_to_start)
3522                 start_packet_forwarding(0);
3523 }
3524
3525 /* This function is used by the interrupt thread */
3526 static int
3527 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3528                   void *ret_param)
3529 {
3530         RTE_SET_USED(param);
3531         RTE_SET_USED(ret_param);
3532
3533         if (type >= RTE_ETH_EVENT_MAX) {
3534                 fprintf(stderr,
3535                         "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3536                         port_id, __func__, type);
3537                 fflush(stderr);
3538         } else if (event_print_mask & (UINT32_C(1) << type)) {
3539                 printf("\nPort %" PRIu16 ": %s event\n", port_id,
3540                         eth_event_desc[type]);
3541                 fflush(stdout);
3542         }
3543
3544         switch (type) {
3545         case RTE_ETH_EVENT_NEW:
3546                 ports[port_id].need_setup = 1;
3547                 ports[port_id].port_status = RTE_PORT_HANDLING;
3548                 break;
3549         case RTE_ETH_EVENT_INTR_RMV:
3550                 if (port_id_is_invalid(port_id, DISABLED_WARN))
3551                         break;
3552                 if (rte_eal_alarm_set(100000,
3553                                 rmv_port_callback, (void *)(intptr_t)port_id))
3554                         fprintf(stderr,
3555                                 "Could not set up deferred device removal\n");
3556                 break;
3557         case RTE_ETH_EVENT_DESTROY:
3558                 ports[port_id].port_status = RTE_PORT_CLOSED;
3559                 printf("Port %u is closed\n", port_id);
3560                 break;
3561         default:
3562                 break;
3563         }
3564         return 0;
3565 }
3566
3567 static int
3568 register_eth_event_callback(void)
3569 {
3570         int ret;
3571         enum rte_eth_event_type event;
3572
3573         for (event = RTE_ETH_EVENT_UNKNOWN;
3574                         event < RTE_ETH_EVENT_MAX; event++) {
3575                 ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3576                                 event,
3577                                 eth_event_callback,
3578                                 NULL);
3579                 if (ret != 0) {
3580                         TESTPMD_LOG(ERR, "Failed to register callback for "
3581                                         "%s event\n", eth_event_desc[event]);
3582                         return -1;
3583                 }
3584         }
3585
3586         return 0;
3587 }
3588
3589 /* This function is used by the interrupt thread */
3590 static void
3591 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3592                              __rte_unused void *arg)
3593 {
3594         uint16_t port_id;
3595         int ret;
3596
3597         if (type >= RTE_DEV_EVENT_MAX) {
3598                 fprintf(stderr, "%s called upon invalid event %d\n",
3599                         __func__, type);
3600                 fflush(stderr);
3601         }
3602
3603         switch (type) {
3604         case RTE_DEV_EVENT_REMOVE:
3605                 RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3606                         device_name);
3607                 ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3608                 if (ret) {
3609                         RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3610                                 device_name);
3611                         return;
3612                 }
3613                 /*
3614                  * Because the user's callback is invoked in eal interrupt
3615                  * callback, the interrupt callback need to be finished before
3616                  * it can be unregistered when detaching device. So finish
3617                  * callback soon and use a deferred removal to detach device
3618                  * is need. It is a workaround, once the device detaching be
3619                  * moved into the eal in the future, the deferred removal could
3620                  * be deleted.
3621                  */
3622                 if (rte_eal_alarm_set(100000,
3623                                 rmv_port_callback, (void *)(intptr_t)port_id))
3624                         RTE_LOG(ERR, EAL,
3625                                 "Could not set up deferred device removal\n");
3626                 break;
3627         case RTE_DEV_EVENT_ADD:
3628                 RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3629                         device_name);
3630                 /* TODO: After finish kernel driver binding,
3631                  * begin to attach port.
3632                  */
3633                 break;
3634         default:
3635                 break;
3636         }
3637 }
3638
3639 static void
3640 rxtx_port_config(portid_t pid)
3641 {
3642         uint16_t qid;
3643         uint64_t offloads;
3644         struct rte_port *port = &ports[pid];
3645
3646         for (qid = 0; qid < nb_rxq; qid++) {
3647                 offloads = port->rx_conf[qid].offloads;
3648                 port->rx_conf[qid] = port->dev_info.default_rxconf;
3649
3650                 if (rxq_share > 0 &&
3651                     (port->dev_info.dev_capa & RTE_ETH_DEV_CAPA_RXQ_SHARE)) {
3652                         /* Non-zero share group to enable RxQ share. */
3653                         port->rx_conf[qid].share_group = pid / rxq_share + 1;
3654                         port->rx_conf[qid].share_qid = qid; /* Equal mapping. */
3655                 }
3656
3657                 if (offloads != 0)
3658                         port->rx_conf[qid].offloads = offloads;
3659
3660                 /* Check if any Rx parameters have been passed */
3661                 if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3662                         port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3663
3664                 if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3665                         port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3666
3667                 if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3668                         port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3669
3670                 if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3671                         port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3672
3673                 if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3674                         port->rx_conf[qid].rx_drop_en = rx_drop_en;
3675
3676                 port->nb_rx_desc[qid] = nb_rxd;
3677         }
3678
3679         for (qid = 0; qid < nb_txq; qid++) {
3680                 offloads = port->tx_conf[qid].offloads;
3681                 port->tx_conf[qid] = port->dev_info.default_txconf;
3682                 if (offloads != 0)
3683                         port->tx_conf[qid].offloads = offloads;
3684
3685                 /* Check if any Tx parameters have been passed */
3686                 if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3687                         port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3688
3689                 if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3690                         port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3691
3692                 if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3693                         port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3694
3695                 if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3696                         port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3697
3698                 if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3699                         port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3700
3701                 port->nb_tx_desc[qid] = nb_txd;
3702         }
3703 }
3704
3705 /*
3706  * Helper function to set MTU from frame size
3707  *
3708  * port->dev_info should be set before calling this function.
3709  *
3710  * return 0 on success, negative on error
3711  */
3712 int
3713 update_mtu_from_frame_size(portid_t portid, uint32_t max_rx_pktlen)
3714 {
3715         struct rte_port *port = &ports[portid];
3716         uint32_t eth_overhead;
3717         uint16_t mtu, new_mtu;
3718
3719         eth_overhead = get_eth_overhead(&port->dev_info);
3720
3721         if (rte_eth_dev_get_mtu(portid, &mtu) != 0) {
3722                 printf("Failed to get MTU for port %u\n", portid);
3723                 return -1;
3724         }
3725
3726         new_mtu = max_rx_pktlen - eth_overhead;
3727
3728         if (mtu == new_mtu)
3729                 return 0;
3730
3731         if (eth_dev_set_mtu_mp(portid, new_mtu) != 0) {
3732                 fprintf(stderr,
3733                         "Failed to set MTU to %u for port %u\n",
3734                         new_mtu, portid);
3735                 return -1;
3736         }
3737
3738         port->dev_conf.rxmode.mtu = new_mtu;
3739
3740         return 0;
3741 }
3742
3743 void
3744 init_port_config(void)
3745 {
3746         portid_t pid;
3747         struct rte_port *port;
3748         int ret, i;
3749
3750         RTE_ETH_FOREACH_DEV(pid) {
3751                 port = &ports[pid];
3752                 port->dev_conf.fdir_conf = fdir_conf;
3753
3754                 ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3755                 if (ret != 0)
3756                         return;
3757
3758                 if (nb_rxq > 1) {
3759                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3760                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3761                                 rss_hf & port->dev_info.flow_type_rss_offloads;
3762                 } else {
3763                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3764                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3765                 }
3766
3767                 if (port->dcb_flag == 0) {
3768                         if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
3769                                 port->dev_conf.rxmode.mq_mode =
3770                                         (enum rte_eth_rx_mq_mode)
3771                                                 (rx_mq_mode & ETH_MQ_RX_RSS);
3772                         } else {
3773                                 port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3774                                 port->dev_conf.rxmode.offloads &=
3775                                                 ~DEV_RX_OFFLOAD_RSS_HASH;
3776
3777                                 for (i = 0;
3778                                      i < port->dev_info.nb_rx_queues;
3779                                      i++)
3780                                         port->rx_conf[i].offloads &=
3781                                                 ~DEV_RX_OFFLOAD_RSS_HASH;
3782                         }
3783                 }
3784
3785                 rxtx_port_config(pid);
3786
3787                 ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3788                 if (ret != 0)
3789                         return;
3790
3791 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3792                 rte_pmd_ixgbe_bypass_init(pid);
3793 #endif
3794
3795                 if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3796                         port->dev_conf.intr_conf.lsc = 1;
3797                 if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3798                         port->dev_conf.intr_conf.rmv = 1;
3799         }
3800 }
3801
3802 void set_port_slave_flag(portid_t slave_pid)
3803 {
3804         struct rte_port *port;
3805
3806         port = &ports[slave_pid];
3807         port->slave_flag = 1;
3808 }
3809
3810 void clear_port_slave_flag(portid_t slave_pid)
3811 {
3812         struct rte_port *port;
3813
3814         port = &ports[slave_pid];
3815         port->slave_flag = 0;
3816 }
3817
3818 uint8_t port_is_bonding_slave(portid_t slave_pid)
3819 {
3820         struct rte_port *port;
3821         struct rte_eth_dev_info dev_info;
3822         int ret;
3823
3824         port = &ports[slave_pid];
3825         ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3826         if (ret != 0) {
3827                 TESTPMD_LOG(ERR,
3828                         "Failed to get device info for port id %d,"
3829                         "cannot determine if the port is a bonded slave",
3830                         slave_pid);
3831                 return 0;
3832         }
3833         if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3834                 return 1;
3835         return 0;
3836 }
3837
3838 const uint16_t vlan_tags[] = {
3839                 0,  1,  2,  3,  4,  5,  6,  7,
3840                 8,  9, 10, 11,  12, 13, 14, 15,
3841                 16, 17, 18, 19, 20, 21, 22, 23,
3842                 24, 25, 26, 27, 28, 29, 30, 31
3843 };
3844
3845 static  int
3846 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3847                  enum dcb_mode_enable dcb_mode,
3848                  enum rte_eth_nb_tcs num_tcs,
3849                  uint8_t pfc_en)
3850 {
3851         uint8_t i;
3852         int32_t rc;
3853         struct rte_eth_rss_conf rss_conf;
3854
3855         /*
3856          * Builds up the correct configuration for dcb+vt based on the vlan tags array
3857          * given above, and the number of traffic classes available for use.
3858          */
3859         if (dcb_mode == DCB_VT_ENABLED) {
3860                 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3861                                 &eth_conf->rx_adv_conf.vmdq_dcb_conf;
3862                 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3863                                 &eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3864
3865                 /* VMDQ+DCB RX and TX configurations */
3866                 vmdq_rx_conf->enable_default_pool = 0;
3867                 vmdq_rx_conf->default_pool = 0;
3868                 vmdq_rx_conf->nb_queue_pools =
3869                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3870                 vmdq_tx_conf->nb_queue_pools =
3871                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3872
3873                 vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3874                 for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3875                         vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3876                         vmdq_rx_conf->pool_map[i].pools =
3877                                 1 << (i % vmdq_rx_conf->nb_queue_pools);
3878                 }
3879                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3880                         vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3881                         vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3882                 }
3883
3884                 /* set DCB mode of RX and TX of multiple queues */
3885                 eth_conf->rxmode.mq_mode =
3886                                 (enum rte_eth_rx_mq_mode)
3887                                         (rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3888                 eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3889         } else {
3890                 struct rte_eth_dcb_rx_conf *rx_conf =
3891                                 &eth_conf->rx_adv_conf.dcb_rx_conf;
3892                 struct rte_eth_dcb_tx_conf *tx_conf =
3893                                 &eth_conf->tx_adv_conf.dcb_tx_conf;
3894
3895                 memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3896
3897                 rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3898                 if (rc != 0)
3899                         return rc;
3900
3901                 rx_conf->nb_tcs = num_tcs;
3902                 tx_conf->nb_tcs = num_tcs;
3903
3904                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3905                         rx_conf->dcb_tc[i] = i % num_tcs;
3906                         tx_conf->dcb_tc[i] = i % num_tcs;
3907                 }
3908
3909                 eth_conf->rxmode.mq_mode =
3910                                 (enum rte_eth_rx_mq_mode)
3911                                         (rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3912                 eth_conf->rx_adv_conf.rss_conf = rss_conf;
3913                 eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3914         }
3915
3916         if (pfc_en)
3917                 eth_conf->dcb_capability_en =
3918                                 ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3919         else
3920                 eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3921
3922         return 0;
3923 }
3924
3925 int
3926 init_port_dcb_config(portid_t pid,
3927                      enum dcb_mode_enable dcb_mode,
3928                      enum rte_eth_nb_tcs num_tcs,
3929                      uint8_t pfc_en)
3930 {
3931         struct rte_eth_conf port_conf;
3932         struct rte_port *rte_port;
3933         int retval;
3934         uint16_t i;
3935
3936         if (num_procs > 1) {
3937                 printf("The multi-process feature doesn't support dcb.\n");
3938                 return -ENOTSUP;
3939         }
3940         rte_port = &ports[pid];
3941
3942         /* retain the original device configuration. */
3943         memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
3944
3945         /*set configuration of DCB in vt mode and DCB in non-vt mode*/
3946         retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3947         if (retval < 0)
3948                 return retval;
3949         port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3950
3951         /* re-configure the device . */
3952         retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3953         if (retval < 0)
3954                 return retval;
3955
3956         retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3957         if (retval != 0)
3958                 return retval;
3959
3960         /* If dev_info.vmdq_pool_base is greater than 0,
3961          * the queue id of vmdq pools is started after pf queues.
3962          */
3963         if (dcb_mode == DCB_VT_ENABLED &&
3964             rte_port->dev_info.vmdq_pool_base > 0) {
3965                 fprintf(stderr,
3966                         "VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3967                         pid);
3968                 return -1;
3969         }
3970
3971         /* Assume the ports in testpmd have the same dcb capability
3972          * and has the same number of rxq and txq in dcb mode
3973          */
3974         if (dcb_mode == DCB_VT_ENABLED) {
3975                 if (rte_port->dev_info.max_vfs > 0) {
3976                         nb_rxq = rte_port->dev_info.nb_rx_queues;
3977                         nb_txq = rte_port->dev_info.nb_tx_queues;
3978                 } else {
3979                         nb_rxq = rte_port->dev_info.max_rx_queues;
3980                         nb_txq = rte_port->dev_info.max_tx_queues;
3981                 }
3982         } else {
3983                 /*if vt is disabled, use all pf queues */
3984                 if (rte_port->dev_info.vmdq_pool_base == 0) {
3985                         nb_rxq = rte_port->dev_info.max_rx_queues;
3986                         nb_txq = rte_port->dev_info.max_tx_queues;
3987                 } else {
3988                         nb_rxq = (queueid_t)num_tcs;
3989                         nb_txq = (queueid_t)num_tcs;
3990
3991                 }
3992         }
3993         rx_free_thresh = 64;
3994
3995         memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3996
3997         rxtx_port_config(pid);
3998         /* VLAN filter */
3999         rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
4000         for (i = 0; i < RTE_DIM(vlan_tags); i++)
4001                 rx_vft_set(pid, vlan_tags[i], 1);
4002
4003         retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
4004         if (retval != 0)
4005                 return retval;
4006
4007         rte_port->dcb_flag = 1;
4008
4009         /* Enter DCB configuration status */
4010         dcb_config = 1;
4011
4012         return 0;
4013 }
4014
4015 static void
4016 init_port(void)
4017 {
4018         int i;
4019
4020         /* Configuration of Ethernet ports. */
4021         ports = rte_zmalloc("testpmd: ports",
4022                             sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4023                             RTE_CACHE_LINE_SIZE);
4024         if (ports == NULL) {
4025                 rte_exit(EXIT_FAILURE,
4026                                 "rte_zmalloc(%d struct rte_port) failed\n",
4027                                 RTE_MAX_ETHPORTS);
4028         }
4029         for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4030                 ports[i].xstats_info.allocated = false;
4031         for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4032                 LIST_INIT(&ports[i].flow_tunnel_list);
4033         /* Initialize ports NUMA structures */
4034         memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4035         memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4036         memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4037 }
4038
4039 static void
4040 force_quit(void)
4041 {
4042         pmd_test_exit();
4043         prompt_exit();
4044 }
4045
4046 static void
4047 print_stats(void)
4048 {
4049         uint8_t i;
4050         const char clr[] = { 27, '[', '2', 'J', '\0' };
4051         const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4052
4053         /* Clear screen and move to top left */
4054         printf("%s%s", clr, top_left);
4055
4056         printf("\nPort statistics ====================================");
4057         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4058                 nic_stats_display(fwd_ports_ids[i]);
4059
4060         fflush(stdout);
4061 }
4062
4063 static void
4064 signal_handler(int signum)
4065 {
4066         if (signum == SIGINT || signum == SIGTERM) {
4067                 fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4068                         signum);
4069 #ifdef RTE_LIB_PDUMP
4070                 /* uninitialize packet capture framework */
4071                 rte_pdump_uninit();
4072 #endif
4073 #ifdef RTE_LIB_LATENCYSTATS
4074                 if (latencystats_enabled != 0)
4075                         rte_latencystats_uninit();
4076 #endif
4077                 force_quit();
4078                 /* Set flag to indicate the force termination. */
4079                 f_quit = 1;
4080                 /* exit with the expected status */
4081 #ifndef RTE_EXEC_ENV_WINDOWS
4082                 signal(signum, SIG_DFL);
4083                 kill(getpid(), signum);
4084 #endif
4085         }
4086 }
4087
4088 int
4089 main(int argc, char** argv)
4090 {
4091         int diag;
4092         portid_t port_id;
4093         uint16_t count;
4094         int ret;
4095
4096         signal(SIGINT, signal_handler);
4097         signal(SIGTERM, signal_handler);
4098
4099         testpmd_logtype = rte_log_register("testpmd");
4100         if (testpmd_logtype < 0)
4101                 rte_exit(EXIT_FAILURE, "Cannot register log type");
4102         rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4103
4104         diag = rte_eal_init(argc, argv);
4105         if (diag < 0)
4106                 rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4107                          rte_strerror(rte_errno));
4108
4109         ret = register_eth_event_callback();
4110         if (ret != 0)
4111                 rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4112
4113 #ifdef RTE_LIB_PDUMP
4114         /* initialize packet capture framework */
4115         rte_pdump_init();
4116 #endif
4117
4118         count = 0;
4119         RTE_ETH_FOREACH_DEV(port_id) {
4120                 ports_ids[count] = port_id;
4121                 count++;
4122         }
4123         nb_ports = (portid_t) count;
4124         if (nb_ports == 0)
4125                 TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4126
4127         /* allocate port structures, and init them */
4128         init_port();
4129
4130         set_def_fwd_config();
4131         if (nb_lcores == 0)
4132                 rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4133                          "Check the core mask argument\n");
4134
4135         /* Bitrate/latency stats disabled by default */
4136 #ifdef RTE_LIB_BITRATESTATS
4137         bitrate_enabled = 0;
4138 #endif
4139 #ifdef RTE_LIB_LATENCYSTATS
4140         latencystats_enabled = 0;
4141 #endif
4142
4143         /* on FreeBSD, mlockall() is disabled by default */
4144 #ifdef RTE_EXEC_ENV_FREEBSD
4145         do_mlockall = 0;
4146 #else
4147         do_mlockall = 1;
4148 #endif
4149
4150         argc -= diag;
4151         argv += diag;
4152         if (argc > 1)
4153                 launch_args_parse(argc, argv);
4154
4155 #ifndef RTE_EXEC_ENV_WINDOWS
4156         if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4157                 TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4158                         strerror(errno));
4159         }
4160 #endif
4161
4162         if (tx_first && interactive)
4163                 rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4164                                 "interactive mode.\n");
4165
4166         if (tx_first && lsc_interrupt) {
4167                 fprintf(stderr,
4168                         "Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4169                 lsc_interrupt = 0;
4170         }
4171
4172         if (!nb_rxq && !nb_txq)
4173                 fprintf(stderr,
4174                         "Warning: Either rx or tx queues should be non-zero\n");
4175
4176         if (nb_rxq > 1 && nb_rxq > nb_txq)
4177                 fprintf(stderr,
4178                         "Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4179                         nb_rxq, nb_txq);
4180
4181         init_config();
4182
4183         if (hot_plug) {
4184                 ret = rte_dev_hotplug_handle_enable();
4185                 if (ret) {
4186                         RTE_LOG(ERR, EAL,
4187                                 "fail to enable hotplug handling.");
4188                         return -1;
4189                 }
4190
4191                 ret = rte_dev_event_monitor_start();
4192                 if (ret) {
4193                         RTE_LOG(ERR, EAL,
4194                                 "fail to start device event monitoring.");
4195                         return -1;
4196                 }
4197
4198                 ret = rte_dev_event_callback_register(NULL,
4199                         dev_event_callback, NULL);
4200                 if (ret) {
4201                         RTE_LOG(ERR, EAL,
4202                                 "fail  to register device event callback\n");
4203                         return -1;
4204                 }
4205         }
4206
4207         if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4208                 rte_exit(EXIT_FAILURE, "Start ports failed\n");
4209
4210         /* set all ports to promiscuous mode by default */
4211         RTE_ETH_FOREACH_DEV(port_id) {
4212                 ret = rte_eth_promiscuous_enable(port_id);
4213                 if (ret != 0)
4214                         fprintf(stderr,
4215                                 "Error during enabling promiscuous mode for port %u: %s - ignore\n",
4216                                 port_id, rte_strerror(-ret));
4217         }
4218
4219         /* Init metrics library */
4220         rte_metrics_init(rte_socket_id());
4221
4222 #ifdef RTE_LIB_LATENCYSTATS
4223         if (latencystats_enabled != 0) {
4224                 int ret = rte_latencystats_init(1, NULL);
4225                 if (ret)
4226                         fprintf(stderr,
4227                                 "Warning: latencystats init() returned error %d\n",
4228                                 ret);
4229                 fprintf(stderr, "Latencystats running on lcore %d\n",
4230                         latencystats_lcore_id);
4231         }
4232 #endif
4233
4234         /* Setup bitrate stats */
4235 #ifdef RTE_LIB_BITRATESTATS
4236         if (bitrate_enabled != 0) {
4237                 bitrate_data = rte_stats_bitrate_create();
4238                 if (bitrate_data == NULL)
4239                         rte_exit(EXIT_FAILURE,
4240                                 "Could not allocate bitrate data.\n");
4241                 rte_stats_bitrate_reg(bitrate_data);
4242         }
4243 #endif
4244 #ifdef RTE_LIB_CMDLINE
4245         if (strlen(cmdline_filename) != 0)
4246                 cmdline_read_from_file(cmdline_filename);
4247
4248         if (interactive == 1) {
4249                 if (auto_start) {
4250                         printf("Start automatic packet forwarding\n");
4251                         start_packet_forwarding(0);
4252                 }
4253                 prompt();
4254                 pmd_test_exit();
4255         } else
4256 #endif
4257         {
4258                 char c;
4259                 int rc;
4260
4261                 f_quit = 0;
4262
4263                 printf("No commandline core given, start packet forwarding\n");
4264                 start_packet_forwarding(tx_first);
4265                 if (stats_period != 0) {
4266                         uint64_t prev_time = 0, cur_time, diff_time = 0;
4267                         uint64_t timer_period;
4268
4269                         /* Convert to number of cycles */
4270                         timer_period = stats_period * rte_get_timer_hz();
4271
4272                         while (f_quit == 0) {
4273                                 cur_time = rte_get_timer_cycles();
4274                                 diff_time += cur_time - prev_time;
4275
4276                                 if (diff_time >= timer_period) {
4277                                         print_stats();
4278                                         /* Reset the timer */
4279                                         diff_time = 0;
4280                                 }
4281                                 /* Sleep to avoid unnecessary checks */
4282                                 prev_time = cur_time;
4283                                 rte_delay_us_sleep(US_PER_S);
4284                         }
4285                 }
4286
4287                 printf("Press enter to exit\n");
4288                 rc = read(0, &c, 1);
4289                 pmd_test_exit();
4290                 if (rc < 0)
4291                         return 1;
4292         }
4293
4294         ret = rte_eal_cleanup();
4295         if (ret != 0)
4296                 rte_exit(EXIT_FAILURE,
4297                          "EAL cleanup failed: %s\n", strerror(-ret));
4298
4299         return EXIT_SUCCESS;
4300 }