net/mlx5: support three level table walk
[dpdk.git] / app / test-pmd / testpmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #include <sys/mman.h>
13 #include <sys/types.h>
14 #include <errno.h>
15 #include <stdbool.h>
16
17 #include <sys/queue.h>
18 #include <sys/stat.h>
19
20 #include <stdint.h>
21 #include <unistd.h>
22 #include <inttypes.h>
23
24 #include <rte_common.h>
25 #include <rte_errno.h>
26 #include <rte_byteorder.h>
27 #include <rte_log.h>
28 #include <rte_debug.h>
29 #include <rte_cycles.h>
30 #include <rte_memory.h>
31 #include <rte_memcpy.h>
32 #include <rte_launch.h>
33 #include <rte_eal.h>
34 #include <rte_alarm.h>
35 #include <rte_per_lcore.h>
36 #include <rte_lcore.h>
37 #include <rte_atomic.h>
38 #include <rte_branch_prediction.h>
39 #include <rte_mempool.h>
40 #include <rte_malloc.h>
41 #include <rte_mbuf.h>
42 #include <rte_mbuf_pool_ops.h>
43 #include <rte_interrupts.h>
44 #include <rte_pci.h>
45 #include <rte_ether.h>
46 #include <rte_ethdev.h>
47 #include <rte_dev.h>
48 #include <rte_string_fns.h>
49 #ifdef RTE_NET_IXGBE
50 #include <rte_pmd_ixgbe.h>
51 #endif
52 #ifdef RTE_LIB_PDUMP
53 #include <rte_pdump.h>
54 #endif
55 #include <rte_flow.h>
56 #include <rte_metrics.h>
57 #ifdef RTE_LIB_BITRATESTATS
58 #include <rte_bitrate.h>
59 #endif
60 #ifdef RTE_LIB_LATENCYSTATS
61 #include <rte_latencystats.h>
62 #endif
63
64 #include "testpmd.h"
65
66 #ifndef MAP_HUGETLB
67 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
68 #define HUGE_FLAG (0x40000)
69 #else
70 #define HUGE_FLAG MAP_HUGETLB
71 #endif
72
73 #ifndef MAP_HUGE_SHIFT
74 /* older kernels (or FreeBSD) will not have this define */
75 #define HUGE_SHIFT (26)
76 #else
77 #define HUGE_SHIFT MAP_HUGE_SHIFT
78 #endif
79
80 #define EXTMEM_HEAP_NAME "extmem"
81 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
82
83 uint16_t verbose_level = 0; /**< Silent by default. */
84 int testpmd_logtype; /**< Log type for testpmd logs */
85
86 /* use main core for command line ? */
87 uint8_t interactive = 0;
88 uint8_t auto_start = 0;
89 uint8_t tx_first;
90 char cmdline_filename[PATH_MAX] = {0};
91
92 /*
93  * NUMA support configuration.
94  * When set, the NUMA support attempts to dispatch the allocation of the
95  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
96  * probed ports among the CPU sockets 0 and 1.
97  * Otherwise, all memory is allocated from CPU socket 0.
98  */
99 uint8_t numa_support = 1; /**< numa enabled by default */
100
101 /*
102  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
103  * not configured.
104  */
105 uint8_t socket_num = UMA_NO_CONFIG;
106
107 /*
108  * Select mempool allocation type:
109  * - native: use regular DPDK memory
110  * - anon: use regular DPDK memory to create mempool, but populate using
111  *         anonymous memory (may not be IOVA-contiguous)
112  * - xmem: use externally allocated hugepage memory
113  */
114 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
115
116 /*
117  * Store specified sockets on which memory pool to be used by ports
118  * is allocated.
119  */
120 uint8_t port_numa[RTE_MAX_ETHPORTS];
121
122 /*
123  * Store specified sockets on which RX ring to be used by ports
124  * is allocated.
125  */
126 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
127
128 /*
129  * Store specified sockets on which TX ring to be used by ports
130  * is allocated.
131  */
132 uint8_t txring_numa[RTE_MAX_ETHPORTS];
133
134 /*
135  * Record the Ethernet address of peer target ports to which packets are
136  * forwarded.
137  * Must be instantiated with the ethernet addresses of peer traffic generator
138  * ports.
139  */
140 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
141 portid_t nb_peer_eth_addrs = 0;
142
143 /*
144  * Probed Target Environment.
145  */
146 struct rte_port *ports;        /**< For all probed ethernet ports. */
147 portid_t nb_ports;             /**< Number of probed ethernet ports. */
148 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
149 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
150
151 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
152
153 /*
154  * Test Forwarding Configuration.
155  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
156  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
157  */
158 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
159 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
160 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
161 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
162
163 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
164 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
165
166 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
167 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
168
169 /*
170  * Forwarding engines.
171  */
172 struct fwd_engine * fwd_engines[] = {
173         &io_fwd_engine,
174         &mac_fwd_engine,
175         &mac_swap_engine,
176         &flow_gen_engine,
177         &rx_only_engine,
178         &tx_only_engine,
179         &csum_fwd_engine,
180         &icmp_echo_engine,
181         &noisy_vnf_engine,
182         &five_tuple_swap_fwd_engine,
183 #ifdef RTE_LIBRTE_IEEE1588
184         &ieee1588_fwd_engine,
185 #endif
186         NULL,
187 };
188
189 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
190 uint16_t mempool_flags;
191
192 struct fwd_config cur_fwd_config;
193 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
194 uint32_t retry_enabled;
195 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
196 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
197
198 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
199 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
200         DEFAULT_MBUF_DATA_SIZE
201 }; /**< Mbuf data space size. */
202 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
203                                       * specified on command-line. */
204 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
205
206 /*
207  * In container, it cannot terminate the process which running with 'stats-period'
208  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
209  */
210 uint8_t f_quit;
211
212 /*
213  * Configuration of packet segments used to scatter received packets
214  * if some of split features is configured.
215  */
216 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
217 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
218 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
219 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
220
221 /*
222  * Configuration of packet segments used by the "txonly" processing engine.
223  */
224 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
225 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
226         TXONLY_DEF_PACKET_LEN,
227 };
228 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
229
230 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
231 /**< Split policy for packets to TX. */
232
233 uint8_t txonly_multi_flow;
234 /**< Whether multiple flows are generated in TXONLY mode. */
235
236 uint32_t tx_pkt_times_inter;
237 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
238
239 uint32_t tx_pkt_times_intra;
240 /**< Timings for send scheduling in TXONLY mode, time between packets. */
241
242 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
243 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
244 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
245
246 /* current configuration is in DCB or not,0 means it is not in DCB mode */
247 uint8_t dcb_config = 0;
248
249 /* Whether the dcb is in testing status */
250 uint8_t dcb_test = 0;
251
252 /*
253  * Configurable number of RX/TX queues.
254  */
255 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
256 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
257 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
258
259 /*
260  * Configurable number of RX/TX ring descriptors.
261  * Defaults are supplied by drivers via ethdev.
262  */
263 #define RTE_TEST_RX_DESC_DEFAULT 0
264 #define RTE_TEST_TX_DESC_DEFAULT 0
265 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
266 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
267
268 #define RTE_PMD_PARAM_UNSET -1
269 /*
270  * Configurable values of RX and TX ring threshold registers.
271  */
272
273 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
274 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
275 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
276
277 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
278 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
279 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
280
281 /*
282  * Configurable value of RX free threshold.
283  */
284 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
285
286 /*
287  * Configurable value of RX drop enable.
288  */
289 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
290
291 /*
292  * Configurable value of TX free threshold.
293  */
294 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
295
296 /*
297  * Configurable value of TX RS bit threshold.
298  */
299 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
300
301 /*
302  * Configurable value of buffered packets before sending.
303  */
304 uint16_t noisy_tx_sw_bufsz;
305
306 /*
307  * Configurable value of packet buffer timeout.
308  */
309 uint16_t noisy_tx_sw_buf_flush_time;
310
311 /*
312  * Configurable value for size of VNF internal memory area
313  * used for simulating noisy neighbour behaviour
314  */
315 uint64_t noisy_lkup_mem_sz;
316
317 /*
318  * Configurable value of number of random writes done in
319  * VNF simulation memory area.
320  */
321 uint64_t noisy_lkup_num_writes;
322
323 /*
324  * Configurable value of number of random reads done in
325  * VNF simulation memory area.
326  */
327 uint64_t noisy_lkup_num_reads;
328
329 /*
330  * Configurable value of number of random reads/writes done in
331  * VNF simulation memory area.
332  */
333 uint64_t noisy_lkup_num_reads_writes;
334
335 /*
336  * Receive Side Scaling (RSS) configuration.
337  */
338 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
339
340 /*
341  * Port topology configuration
342  */
343 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
344
345 /*
346  * Avoids to flush all the RX streams before starts forwarding.
347  */
348 uint8_t no_flush_rx = 0; /* flush by default */
349
350 /*
351  * Flow API isolated mode.
352  */
353 uint8_t flow_isolate_all;
354
355 /*
356  * Avoids to check link status when starting/stopping a port.
357  */
358 uint8_t no_link_check = 0; /* check by default */
359
360 /*
361  * Don't automatically start all ports in interactive mode.
362  */
363 uint8_t no_device_start = 0;
364
365 /*
366  * Enable link status change notification
367  */
368 uint8_t lsc_interrupt = 1; /* enabled by default */
369
370 /*
371  * Enable device removal notification.
372  */
373 uint8_t rmv_interrupt = 1; /* enabled by default */
374
375 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
376
377 /* After attach, port setup is called on event or by iterator */
378 bool setup_on_probe_event = true;
379
380 /* Clear ptypes on port initialization. */
381 uint8_t clear_ptypes = true;
382
383 /* Hairpin ports configuration mode. */
384 uint16_t hairpin_mode;
385
386 /* Pretty printing of ethdev events */
387 static const char * const eth_event_desc[] = {
388         [RTE_ETH_EVENT_UNKNOWN] = "unknown",
389         [RTE_ETH_EVENT_INTR_LSC] = "link state change",
390         [RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
391         [RTE_ETH_EVENT_INTR_RESET] = "reset",
392         [RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
393         [RTE_ETH_EVENT_IPSEC] = "IPsec",
394         [RTE_ETH_EVENT_MACSEC] = "MACsec",
395         [RTE_ETH_EVENT_INTR_RMV] = "device removal",
396         [RTE_ETH_EVENT_NEW] = "device probed",
397         [RTE_ETH_EVENT_DESTROY] = "device released",
398         [RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
399         [RTE_ETH_EVENT_MAX] = NULL,
400 };
401
402 /*
403  * Display or mask ether events
404  * Default to all events except VF_MBOX
405  */
406 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
407                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
408                             (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
409                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
410                             (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
411                             (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
412                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
413                             (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
414 /*
415  * Decide if all memory are locked for performance.
416  */
417 int do_mlockall = 0;
418
419 /*
420  * NIC bypass mode configuration options.
421  */
422
423 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
424 /* The NIC bypass watchdog timeout. */
425 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
426 #endif
427
428
429 #ifdef RTE_LIB_LATENCYSTATS
430
431 /*
432  * Set when latency stats is enabled in the commandline
433  */
434 uint8_t latencystats_enabled;
435
436 /*
437  * Lcore ID to serive latency statistics.
438  */
439 lcoreid_t latencystats_lcore_id = -1;
440
441 #endif
442
443 /*
444  * Ethernet device configuration.
445  */
446 struct rte_eth_rxmode rx_mode = {
447         /* Default maximum frame length.
448          * Zero is converted to "RTE_ETHER_MTU + PMD Ethernet overhead"
449          * in init_config().
450          */
451         .max_rx_pkt_len = 0,
452 };
453
454 struct rte_eth_txmode tx_mode = {
455         .offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
456 };
457
458 struct rte_fdir_conf fdir_conf = {
459         .mode = RTE_FDIR_MODE_NONE,
460         .pballoc = RTE_FDIR_PBALLOC_64K,
461         .status = RTE_FDIR_REPORT_STATUS,
462         .mask = {
463                 .vlan_tci_mask = 0xFFEF,
464                 .ipv4_mask     = {
465                         .src_ip = 0xFFFFFFFF,
466                         .dst_ip = 0xFFFFFFFF,
467                 },
468                 .ipv6_mask     = {
469                         .src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
470                         .dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
471                 },
472                 .src_port_mask = 0xFFFF,
473                 .dst_port_mask = 0xFFFF,
474                 .mac_addr_byte_mask = 0xFF,
475                 .tunnel_type_mask = 1,
476                 .tunnel_id_mask = 0xFFFFFFFF,
477         },
478         .drop_queue = 127,
479 };
480
481 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
482
483 /*
484  * Display zero values by default for xstats
485  */
486 uint8_t xstats_hide_zero;
487
488 /*
489  * Measure of CPU cycles disabled by default
490  */
491 uint8_t record_core_cycles;
492
493 /*
494  * Display of RX and TX bursts disabled by default
495  */
496 uint8_t record_burst_stats;
497
498 unsigned int num_sockets = 0;
499 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
500
501 #ifdef RTE_LIB_BITRATESTATS
502 /* Bitrate statistics */
503 struct rte_stats_bitrates *bitrate_data;
504 lcoreid_t bitrate_lcore_id;
505 uint8_t bitrate_enabled;
506 #endif
507
508 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
509 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
510
511 /*
512  * hexadecimal bitmask of RX mq mode can be enabled.
513  */
514 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
515
516 /*
517  * Used to set forced link speed
518  */
519 uint32_t eth_link_speed;
520
521 /* Forward function declarations */
522 static void setup_attached_port(portid_t pi);
523 static void check_all_ports_link_status(uint32_t port_mask);
524 static int eth_event_callback(portid_t port_id,
525                               enum rte_eth_event_type type,
526                               void *param, void *ret_param);
527 static void dev_event_callback(const char *device_name,
528                                 enum rte_dev_event_type type,
529                                 void *param);
530
531 /*
532  * Check if all the ports are started.
533  * If yes, return positive value. If not, return zero.
534  */
535 static int all_ports_started(void);
536
537 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
538 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
539
540 /* Holds the registered mbuf dynamic flags names. */
541 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
542
543 /*
544  * Helper function to check if socket is already discovered.
545  * If yes, return positive value. If not, return zero.
546  */
547 int
548 new_socket_id(unsigned int socket_id)
549 {
550         unsigned int i;
551
552         for (i = 0; i < num_sockets; i++) {
553                 if (socket_ids[i] == socket_id)
554                         return 0;
555         }
556         return 1;
557 }
558
559 /*
560  * Setup default configuration.
561  */
562 static void
563 set_default_fwd_lcores_config(void)
564 {
565         unsigned int i;
566         unsigned int nb_lc;
567         unsigned int sock_num;
568
569         nb_lc = 0;
570         for (i = 0; i < RTE_MAX_LCORE; i++) {
571                 if (!rte_lcore_is_enabled(i))
572                         continue;
573                 sock_num = rte_lcore_to_socket_id(i);
574                 if (new_socket_id(sock_num)) {
575                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
576                                 rte_exit(EXIT_FAILURE,
577                                          "Total sockets greater than %u\n",
578                                          RTE_MAX_NUMA_NODES);
579                         }
580                         socket_ids[num_sockets++] = sock_num;
581                 }
582                 if (i == rte_get_main_lcore())
583                         continue;
584                 fwd_lcores_cpuids[nb_lc++] = i;
585         }
586         nb_lcores = (lcoreid_t) nb_lc;
587         nb_cfg_lcores = nb_lcores;
588         nb_fwd_lcores = 1;
589 }
590
591 static void
592 set_def_peer_eth_addrs(void)
593 {
594         portid_t i;
595
596         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
597                 peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
598                 peer_eth_addrs[i].addr_bytes[5] = i;
599         }
600 }
601
602 static void
603 set_default_fwd_ports_config(void)
604 {
605         portid_t pt_id;
606         int i = 0;
607
608         RTE_ETH_FOREACH_DEV(pt_id) {
609                 fwd_ports_ids[i++] = pt_id;
610
611                 /* Update sockets info according to the attached device */
612                 int socket_id = rte_eth_dev_socket_id(pt_id);
613                 if (socket_id >= 0 && new_socket_id(socket_id)) {
614                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
615                                 rte_exit(EXIT_FAILURE,
616                                          "Total sockets greater than %u\n",
617                                          RTE_MAX_NUMA_NODES);
618                         }
619                         socket_ids[num_sockets++] = socket_id;
620                 }
621         }
622
623         nb_cfg_ports = nb_ports;
624         nb_fwd_ports = nb_ports;
625 }
626
627 void
628 set_def_fwd_config(void)
629 {
630         set_default_fwd_lcores_config();
631         set_def_peer_eth_addrs();
632         set_default_fwd_ports_config();
633 }
634
635 /* extremely pessimistic estimation of memory required to create a mempool */
636 static int
637 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
638 {
639         unsigned int n_pages, mbuf_per_pg, leftover;
640         uint64_t total_mem, mbuf_mem, obj_sz;
641
642         /* there is no good way to predict how much space the mempool will
643          * occupy because it will allocate chunks on the fly, and some of those
644          * will come from default DPDK memory while some will come from our
645          * external memory, so just assume 128MB will be enough for everyone.
646          */
647         uint64_t hdr_mem = 128 << 20;
648
649         /* account for possible non-contiguousness */
650         obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
651         if (obj_sz > pgsz) {
652                 TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
653                 return -1;
654         }
655
656         mbuf_per_pg = pgsz / obj_sz;
657         leftover = (nb_mbufs % mbuf_per_pg) > 0;
658         n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
659
660         mbuf_mem = n_pages * pgsz;
661
662         total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
663
664         if (total_mem > SIZE_MAX) {
665                 TESTPMD_LOG(ERR, "Memory size too big\n");
666                 return -1;
667         }
668         *out = (size_t)total_mem;
669
670         return 0;
671 }
672
673 static int
674 pagesz_flags(uint64_t page_sz)
675 {
676         /* as per mmap() manpage, all page sizes are log2 of page size
677          * shifted by MAP_HUGE_SHIFT
678          */
679         int log2 = rte_log2_u64(page_sz);
680
681         return (log2 << HUGE_SHIFT);
682 }
683
684 static void *
685 alloc_mem(size_t memsz, size_t pgsz, bool huge)
686 {
687         void *addr;
688         int flags;
689
690         /* allocate anonymous hugepages */
691         flags = MAP_ANONYMOUS | MAP_PRIVATE;
692         if (huge)
693                 flags |= HUGE_FLAG | pagesz_flags(pgsz);
694
695         addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
696         if (addr == MAP_FAILED)
697                 return NULL;
698
699         return addr;
700 }
701
702 struct extmem_param {
703         void *addr;
704         size_t len;
705         size_t pgsz;
706         rte_iova_t *iova_table;
707         unsigned int iova_table_len;
708 };
709
710 static int
711 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
712                 bool huge)
713 {
714         uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
715                         RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
716         unsigned int cur_page, n_pages, pgsz_idx;
717         size_t mem_sz, cur_pgsz;
718         rte_iova_t *iovas = NULL;
719         void *addr;
720         int ret;
721
722         for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
723                 /* skip anything that is too big */
724                 if (pgsizes[pgsz_idx] > SIZE_MAX)
725                         continue;
726
727                 cur_pgsz = pgsizes[pgsz_idx];
728
729                 /* if we were told not to allocate hugepages, override */
730                 if (!huge)
731                         cur_pgsz = sysconf(_SC_PAGESIZE);
732
733                 ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
734                 if (ret < 0) {
735                         TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
736                         return -1;
737                 }
738
739                 /* allocate our memory */
740                 addr = alloc_mem(mem_sz, cur_pgsz, huge);
741
742                 /* if we couldn't allocate memory with a specified page size,
743                  * that doesn't mean we can't do it with other page sizes, so
744                  * try another one.
745                  */
746                 if (addr == NULL)
747                         continue;
748
749                 /* store IOVA addresses for every page in this memory area */
750                 n_pages = mem_sz / cur_pgsz;
751
752                 iovas = malloc(sizeof(*iovas) * n_pages);
753
754                 if (iovas == NULL) {
755                         TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
756                         goto fail;
757                 }
758                 /* lock memory if it's not huge pages */
759                 if (!huge)
760                         mlock(addr, mem_sz);
761
762                 /* populate IOVA addresses */
763                 for (cur_page = 0; cur_page < n_pages; cur_page++) {
764                         rte_iova_t iova;
765                         size_t offset;
766                         void *cur;
767
768                         offset = cur_pgsz * cur_page;
769                         cur = RTE_PTR_ADD(addr, offset);
770
771                         /* touch the page before getting its IOVA */
772                         *(volatile char *)cur = 0;
773
774                         iova = rte_mem_virt2iova(cur);
775
776                         iovas[cur_page] = iova;
777                 }
778
779                 break;
780         }
781         /* if we couldn't allocate anything */
782         if (iovas == NULL)
783                 return -1;
784
785         param->addr = addr;
786         param->len = mem_sz;
787         param->pgsz = cur_pgsz;
788         param->iova_table = iovas;
789         param->iova_table_len = n_pages;
790
791         return 0;
792 fail:
793         if (iovas)
794                 free(iovas);
795         if (addr)
796                 munmap(addr, mem_sz);
797
798         return -1;
799 }
800
801 static int
802 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
803 {
804         struct extmem_param param;
805         int socket_id, ret;
806
807         memset(&param, 0, sizeof(param));
808
809         /* check if our heap exists */
810         socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
811         if (socket_id < 0) {
812                 /* create our heap */
813                 ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
814                 if (ret < 0) {
815                         TESTPMD_LOG(ERR, "Cannot create heap\n");
816                         return -1;
817                 }
818         }
819
820         ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
821         if (ret < 0) {
822                 TESTPMD_LOG(ERR, "Cannot create memory area\n");
823                 return -1;
824         }
825
826         /* we now have a valid memory area, so add it to heap */
827         ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
828                         param.addr, param.len, param.iova_table,
829                         param.iova_table_len, param.pgsz);
830
831         /* when using VFIO, memory is automatically mapped for DMA by EAL */
832
833         /* not needed any more */
834         free(param.iova_table);
835
836         if (ret < 0) {
837                 TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
838                 munmap(param.addr, param.len);
839                 return -1;
840         }
841
842         /* success */
843
844         TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
845                         param.len >> 20);
846
847         return 0;
848 }
849 static void
850 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
851              struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
852 {
853         uint16_t pid = 0;
854         int ret;
855
856         RTE_ETH_FOREACH_DEV(pid) {
857                 struct rte_eth_dev *dev =
858                         &rte_eth_devices[pid];
859
860                 ret = rte_dev_dma_unmap(dev->device, memhdr->addr, 0,
861                                         memhdr->len);
862                 if (ret) {
863                         TESTPMD_LOG(DEBUG,
864                                     "unable to DMA unmap addr 0x%p "
865                                     "for device %s\n",
866                                     memhdr->addr, dev->data->name);
867                 }
868         }
869         ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
870         if (ret) {
871                 TESTPMD_LOG(DEBUG,
872                             "unable to un-register addr 0x%p\n", memhdr->addr);
873         }
874 }
875
876 static void
877 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
878            struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
879 {
880         uint16_t pid = 0;
881         size_t page_size = sysconf(_SC_PAGESIZE);
882         int ret;
883
884         ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
885                                   page_size);
886         if (ret) {
887                 TESTPMD_LOG(DEBUG,
888                             "unable to register addr 0x%p\n", memhdr->addr);
889                 return;
890         }
891         RTE_ETH_FOREACH_DEV(pid) {
892                 struct rte_eth_dev *dev =
893                         &rte_eth_devices[pid];
894
895                 ret = rte_dev_dma_map(dev->device, memhdr->addr, 0,
896                                       memhdr->len);
897                 if (ret) {
898                         TESTPMD_LOG(DEBUG,
899                                     "unable to DMA map addr 0x%p "
900                                     "for device %s\n",
901                                     memhdr->addr, dev->data->name);
902                 }
903         }
904 }
905
906 static unsigned int
907 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
908             char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
909 {
910         struct rte_pktmbuf_extmem *xmem;
911         unsigned int ext_num, zone_num, elt_num;
912         uint16_t elt_size;
913
914         elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
915         elt_num = EXTBUF_ZONE_SIZE / elt_size;
916         zone_num = (nb_mbufs + elt_num - 1) / elt_num;
917
918         xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
919         if (xmem == NULL) {
920                 TESTPMD_LOG(ERR, "Cannot allocate memory for "
921                                  "external buffer descriptors\n");
922                 *ext_mem = NULL;
923                 return 0;
924         }
925         for (ext_num = 0; ext_num < zone_num; ext_num++) {
926                 struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
927                 const struct rte_memzone *mz;
928                 char mz_name[RTE_MEMZONE_NAMESIZE];
929                 int ret;
930
931                 ret = snprintf(mz_name, sizeof(mz_name),
932                         RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
933                 if (ret < 0 || ret >= (int)sizeof(mz_name)) {
934                         errno = ENAMETOOLONG;
935                         ext_num = 0;
936                         break;
937                 }
938                 mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
939                                                  socket_id,
940                                                  RTE_MEMZONE_IOVA_CONTIG |
941                                                  RTE_MEMZONE_1GB |
942                                                  RTE_MEMZONE_SIZE_HINT_ONLY,
943                                                  EXTBUF_ZONE_SIZE);
944                 if (mz == NULL) {
945                         /*
946                          * The caller exits on external buffer creation
947                          * error, so there is no need to free memzones.
948                          */
949                         errno = ENOMEM;
950                         ext_num = 0;
951                         break;
952                 }
953                 xseg->buf_ptr = mz->addr;
954                 xseg->buf_iova = mz->iova;
955                 xseg->buf_len = EXTBUF_ZONE_SIZE;
956                 xseg->elt_size = elt_size;
957         }
958         if (ext_num == 0 && xmem != NULL) {
959                 free(xmem);
960                 xmem = NULL;
961         }
962         *ext_mem = xmem;
963         return ext_num;
964 }
965
966 /*
967  * Configuration initialisation done once at init time.
968  */
969 static struct rte_mempool *
970 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
971                  unsigned int socket_id, uint16_t size_idx)
972 {
973         char pool_name[RTE_MEMPOOL_NAMESIZE];
974         struct rte_mempool *rte_mp = NULL;
975         uint32_t mb_size;
976
977         mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
978         mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
979
980         TESTPMD_LOG(INFO,
981                 "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
982                 pool_name, nb_mbuf, mbuf_seg_size, socket_id);
983
984         switch (mp_alloc_type) {
985         case MP_ALLOC_NATIVE:
986                 {
987                         /* wrapper to rte_mempool_create() */
988                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
989                                         rte_mbuf_best_mempool_ops());
990                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
991                                 mb_mempool_cache, 0, mbuf_seg_size, socket_id);
992                         break;
993                 }
994         case MP_ALLOC_ANON:
995                 {
996                         rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
997                                 mb_size, (unsigned int) mb_mempool_cache,
998                                 sizeof(struct rte_pktmbuf_pool_private),
999                                 socket_id, mempool_flags);
1000                         if (rte_mp == NULL)
1001                                 goto err;
1002
1003                         if (rte_mempool_populate_anon(rte_mp) == 0) {
1004                                 rte_mempool_free(rte_mp);
1005                                 rte_mp = NULL;
1006                                 goto err;
1007                         }
1008                         rte_pktmbuf_pool_init(rte_mp, NULL);
1009                         rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1010                         rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1011                         break;
1012                 }
1013         case MP_ALLOC_XMEM:
1014         case MP_ALLOC_XMEM_HUGE:
1015                 {
1016                         int heap_socket;
1017                         bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1018
1019                         if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1020                                 rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1021
1022                         heap_socket =
1023                                 rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1024                         if (heap_socket < 0)
1025                                 rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1026
1027                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1028                                         rte_mbuf_best_mempool_ops());
1029                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1030                                         mb_mempool_cache, 0, mbuf_seg_size,
1031                                         heap_socket);
1032                         break;
1033                 }
1034         case MP_ALLOC_XBUF:
1035                 {
1036                         struct rte_pktmbuf_extmem *ext_mem;
1037                         unsigned int ext_num;
1038
1039                         ext_num = setup_extbuf(nb_mbuf, mbuf_seg_size,
1040                                                socket_id, pool_name, &ext_mem);
1041                         if (ext_num == 0)
1042                                 rte_exit(EXIT_FAILURE,
1043                                          "Can't create pinned data buffers\n");
1044
1045                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1046                                         rte_mbuf_best_mempool_ops());
1047                         rte_mp = rte_pktmbuf_pool_create_extbuf
1048                                         (pool_name, nb_mbuf, mb_mempool_cache,
1049                                          0, mbuf_seg_size, socket_id,
1050                                          ext_mem, ext_num);
1051                         free(ext_mem);
1052                         break;
1053                 }
1054         default:
1055                 {
1056                         rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1057                 }
1058         }
1059
1060 err:
1061         if (rte_mp == NULL) {
1062                 rte_exit(EXIT_FAILURE,
1063                         "Creation of mbuf pool for socket %u failed: %s\n",
1064                         socket_id, rte_strerror(rte_errno));
1065         } else if (verbose_level > 0) {
1066                 rte_mempool_dump(stdout, rte_mp);
1067         }
1068         return rte_mp;
1069 }
1070
1071 /*
1072  * Check given socket id is valid or not with NUMA mode,
1073  * if valid, return 0, else return -1
1074  */
1075 static int
1076 check_socket_id(const unsigned int socket_id)
1077 {
1078         static int warning_once = 0;
1079
1080         if (new_socket_id(socket_id)) {
1081                 if (!warning_once && numa_support)
1082                         printf("Warning: NUMA should be configured manually by"
1083                                " using --port-numa-config and"
1084                                " --ring-numa-config parameters along with"
1085                                " --numa.\n");
1086                 warning_once = 1;
1087                 return -1;
1088         }
1089         return 0;
1090 }
1091
1092 /*
1093  * Get the allowed maximum number of RX queues.
1094  * *pid return the port id which has minimal value of
1095  * max_rx_queues in all ports.
1096  */
1097 queueid_t
1098 get_allowed_max_nb_rxq(portid_t *pid)
1099 {
1100         queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1101         bool max_rxq_valid = false;
1102         portid_t pi;
1103         struct rte_eth_dev_info dev_info;
1104
1105         RTE_ETH_FOREACH_DEV(pi) {
1106                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1107                         continue;
1108
1109                 max_rxq_valid = true;
1110                 if (dev_info.max_rx_queues < allowed_max_rxq) {
1111                         allowed_max_rxq = dev_info.max_rx_queues;
1112                         *pid = pi;
1113                 }
1114         }
1115         return max_rxq_valid ? allowed_max_rxq : 0;
1116 }
1117
1118 /*
1119  * Check input rxq is valid or not.
1120  * If input rxq is not greater than any of maximum number
1121  * of RX queues of all ports, it is valid.
1122  * if valid, return 0, else return -1
1123  */
1124 int
1125 check_nb_rxq(queueid_t rxq)
1126 {
1127         queueid_t allowed_max_rxq;
1128         portid_t pid = 0;
1129
1130         allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1131         if (rxq > allowed_max_rxq) {
1132                 printf("Fail: input rxq (%u) can't be greater "
1133                        "than max_rx_queues (%u) of port %u\n",
1134                        rxq,
1135                        allowed_max_rxq,
1136                        pid);
1137                 return -1;
1138         }
1139         return 0;
1140 }
1141
1142 /*
1143  * Get the allowed maximum number of TX queues.
1144  * *pid return the port id which has minimal value of
1145  * max_tx_queues in all ports.
1146  */
1147 queueid_t
1148 get_allowed_max_nb_txq(portid_t *pid)
1149 {
1150         queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1151         bool max_txq_valid = false;
1152         portid_t pi;
1153         struct rte_eth_dev_info dev_info;
1154
1155         RTE_ETH_FOREACH_DEV(pi) {
1156                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1157                         continue;
1158
1159                 max_txq_valid = true;
1160                 if (dev_info.max_tx_queues < allowed_max_txq) {
1161                         allowed_max_txq = dev_info.max_tx_queues;
1162                         *pid = pi;
1163                 }
1164         }
1165         return max_txq_valid ? allowed_max_txq : 0;
1166 }
1167
1168 /*
1169  * Check input txq is valid or not.
1170  * If input txq is not greater than any of maximum number
1171  * of TX queues of all ports, it is valid.
1172  * if valid, return 0, else return -1
1173  */
1174 int
1175 check_nb_txq(queueid_t txq)
1176 {
1177         queueid_t allowed_max_txq;
1178         portid_t pid = 0;
1179
1180         allowed_max_txq = get_allowed_max_nb_txq(&pid);
1181         if (txq > allowed_max_txq) {
1182                 printf("Fail: input txq (%u) can't be greater "
1183                        "than max_tx_queues (%u) of port %u\n",
1184                        txq,
1185                        allowed_max_txq,
1186                        pid);
1187                 return -1;
1188         }
1189         return 0;
1190 }
1191
1192 /*
1193  * Get the allowed maximum number of RXDs of every rx queue.
1194  * *pid return the port id which has minimal value of
1195  * max_rxd in all queues of all ports.
1196  */
1197 static uint16_t
1198 get_allowed_max_nb_rxd(portid_t *pid)
1199 {
1200         uint16_t allowed_max_rxd = UINT16_MAX;
1201         portid_t pi;
1202         struct rte_eth_dev_info dev_info;
1203
1204         RTE_ETH_FOREACH_DEV(pi) {
1205                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1206                         continue;
1207
1208                 if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1209                         allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1210                         *pid = pi;
1211                 }
1212         }
1213         return allowed_max_rxd;
1214 }
1215
1216 /*
1217  * Get the allowed minimal number of RXDs of every rx queue.
1218  * *pid return the port id which has minimal value of
1219  * min_rxd in all queues of all ports.
1220  */
1221 static uint16_t
1222 get_allowed_min_nb_rxd(portid_t *pid)
1223 {
1224         uint16_t allowed_min_rxd = 0;
1225         portid_t pi;
1226         struct rte_eth_dev_info dev_info;
1227
1228         RTE_ETH_FOREACH_DEV(pi) {
1229                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1230                         continue;
1231
1232                 if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1233                         allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1234                         *pid = pi;
1235                 }
1236         }
1237
1238         return allowed_min_rxd;
1239 }
1240
1241 /*
1242  * Check input rxd is valid or not.
1243  * If input rxd is not greater than any of maximum number
1244  * of RXDs of every Rx queues and is not less than any of
1245  * minimal number of RXDs of every Rx queues, it is valid.
1246  * if valid, return 0, else return -1
1247  */
1248 int
1249 check_nb_rxd(queueid_t rxd)
1250 {
1251         uint16_t allowed_max_rxd;
1252         uint16_t allowed_min_rxd;
1253         portid_t pid = 0;
1254
1255         allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1256         if (rxd > allowed_max_rxd) {
1257                 printf("Fail: input rxd (%u) can't be greater "
1258                        "than max_rxds (%u) of port %u\n",
1259                        rxd,
1260                        allowed_max_rxd,
1261                        pid);
1262                 return -1;
1263         }
1264
1265         allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1266         if (rxd < allowed_min_rxd) {
1267                 printf("Fail: input rxd (%u) can't be less "
1268                        "than min_rxds (%u) of port %u\n",
1269                        rxd,
1270                        allowed_min_rxd,
1271                        pid);
1272                 return -1;
1273         }
1274
1275         return 0;
1276 }
1277
1278 /*
1279  * Get the allowed maximum number of TXDs of every rx queues.
1280  * *pid return the port id which has minimal value of
1281  * max_txd in every tx queue.
1282  */
1283 static uint16_t
1284 get_allowed_max_nb_txd(portid_t *pid)
1285 {
1286         uint16_t allowed_max_txd = UINT16_MAX;
1287         portid_t pi;
1288         struct rte_eth_dev_info dev_info;
1289
1290         RTE_ETH_FOREACH_DEV(pi) {
1291                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1292                         continue;
1293
1294                 if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1295                         allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1296                         *pid = pi;
1297                 }
1298         }
1299         return allowed_max_txd;
1300 }
1301
1302 /*
1303  * Get the allowed maximum number of TXDs of every tx queues.
1304  * *pid return the port id which has minimal value of
1305  * min_txd in every tx queue.
1306  */
1307 static uint16_t
1308 get_allowed_min_nb_txd(portid_t *pid)
1309 {
1310         uint16_t allowed_min_txd = 0;
1311         portid_t pi;
1312         struct rte_eth_dev_info dev_info;
1313
1314         RTE_ETH_FOREACH_DEV(pi) {
1315                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1316                         continue;
1317
1318                 if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1319                         allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1320                         *pid = pi;
1321                 }
1322         }
1323
1324         return allowed_min_txd;
1325 }
1326
1327 /*
1328  * Check input txd is valid or not.
1329  * If input txd is not greater than any of maximum number
1330  * of TXDs of every Rx queues, it is valid.
1331  * if valid, return 0, else return -1
1332  */
1333 int
1334 check_nb_txd(queueid_t txd)
1335 {
1336         uint16_t allowed_max_txd;
1337         uint16_t allowed_min_txd;
1338         portid_t pid = 0;
1339
1340         allowed_max_txd = get_allowed_max_nb_txd(&pid);
1341         if (txd > allowed_max_txd) {
1342                 printf("Fail: input txd (%u) can't be greater "
1343                        "than max_txds (%u) of port %u\n",
1344                        txd,
1345                        allowed_max_txd,
1346                        pid);
1347                 return -1;
1348         }
1349
1350         allowed_min_txd = get_allowed_min_nb_txd(&pid);
1351         if (txd < allowed_min_txd) {
1352                 printf("Fail: input txd (%u) can't be less "
1353                        "than min_txds (%u) of port %u\n",
1354                        txd,
1355                        allowed_min_txd,
1356                        pid);
1357                 return -1;
1358         }
1359         return 0;
1360 }
1361
1362
1363 /*
1364  * Get the allowed maximum number of hairpin queues.
1365  * *pid return the port id which has minimal value of
1366  * max_hairpin_queues in all ports.
1367  */
1368 queueid_t
1369 get_allowed_max_nb_hairpinq(portid_t *pid)
1370 {
1371         queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1372         portid_t pi;
1373         struct rte_eth_hairpin_cap cap;
1374
1375         RTE_ETH_FOREACH_DEV(pi) {
1376                 if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1377                         *pid = pi;
1378                         return 0;
1379                 }
1380                 if (cap.max_nb_queues < allowed_max_hairpinq) {
1381                         allowed_max_hairpinq = cap.max_nb_queues;
1382                         *pid = pi;
1383                 }
1384         }
1385         return allowed_max_hairpinq;
1386 }
1387
1388 /*
1389  * Check input hairpin is valid or not.
1390  * If input hairpin is not greater than any of maximum number
1391  * of hairpin queues of all ports, it is valid.
1392  * if valid, return 0, else return -1
1393  */
1394 int
1395 check_nb_hairpinq(queueid_t hairpinq)
1396 {
1397         queueid_t allowed_max_hairpinq;
1398         portid_t pid = 0;
1399
1400         allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1401         if (hairpinq > allowed_max_hairpinq) {
1402                 printf("Fail: input hairpin (%u) can't be greater "
1403                        "than max_hairpin_queues (%u) of port %u\n",
1404                        hairpinq, allowed_max_hairpinq, pid);
1405                 return -1;
1406         }
1407         return 0;
1408 }
1409
1410 static void
1411 init_config(void)
1412 {
1413         portid_t pid;
1414         struct rte_port *port;
1415         struct rte_mempool *mbp;
1416         unsigned int nb_mbuf_per_pool;
1417         lcoreid_t  lc_id;
1418         uint8_t port_per_socket[RTE_MAX_NUMA_NODES];
1419         struct rte_gro_param gro_param;
1420         uint32_t gso_types;
1421         uint16_t data_size;
1422         bool warning = 0;
1423         int k;
1424         int ret;
1425
1426         memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
1427
1428         /* Configuration of logical cores. */
1429         fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1430                                 sizeof(struct fwd_lcore *) * nb_lcores,
1431                                 RTE_CACHE_LINE_SIZE);
1432         if (fwd_lcores == NULL) {
1433                 rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1434                                                         "failed\n", nb_lcores);
1435         }
1436         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1437                 fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1438                                                sizeof(struct fwd_lcore),
1439                                                RTE_CACHE_LINE_SIZE);
1440                 if (fwd_lcores[lc_id] == NULL) {
1441                         rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1442                                                                 "failed\n");
1443                 }
1444                 fwd_lcores[lc_id]->cpuid_idx = lc_id;
1445         }
1446
1447         RTE_ETH_FOREACH_DEV(pid) {
1448                 port = &ports[pid];
1449                 /* Apply default TxRx configuration for all ports */
1450                 port->dev_conf.txmode = tx_mode;
1451                 port->dev_conf.rxmode = rx_mode;
1452
1453                 ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1454                 if (ret != 0)
1455                         rte_exit(EXIT_FAILURE,
1456                                  "rte_eth_dev_info_get() failed\n");
1457
1458                 ret = update_jumbo_frame_offload(pid);
1459                 if (ret != 0)
1460                         printf("Updating jumbo frame offload failed for port %u\n",
1461                                 pid);
1462
1463                 if (!(port->dev_info.tx_offload_capa &
1464                       DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1465                         port->dev_conf.txmode.offloads &=
1466                                 ~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1467                 if (numa_support) {
1468                         if (port_numa[pid] != NUMA_NO_CONFIG)
1469                                 port_per_socket[port_numa[pid]]++;
1470                         else {
1471                                 uint32_t socket_id = rte_eth_dev_socket_id(pid);
1472
1473                                 /*
1474                                  * if socket_id is invalid,
1475                                  * set to the first available socket.
1476                                  */
1477                                 if (check_socket_id(socket_id) < 0)
1478                                         socket_id = socket_ids[0];
1479                                 port_per_socket[socket_id]++;
1480                         }
1481                 }
1482
1483                 /* Apply Rx offloads configuration */
1484                 for (k = 0; k < port->dev_info.max_rx_queues; k++)
1485                         port->rx_conf[k].offloads =
1486                                 port->dev_conf.rxmode.offloads;
1487                 /* Apply Tx offloads configuration */
1488                 for (k = 0; k < port->dev_info.max_tx_queues; k++)
1489                         port->tx_conf[k].offloads =
1490                                 port->dev_conf.txmode.offloads;
1491
1492                 if (eth_link_speed)
1493                         port->dev_conf.link_speeds = eth_link_speed;
1494
1495                 /* set flag to initialize port/queue */
1496                 port->need_reconfig = 1;
1497                 port->need_reconfig_queues = 1;
1498                 port->tx_metadata = 0;
1499
1500                 /* Check for maximum number of segments per MTU. Accordingly
1501                  * update the mbuf data size.
1502                  */
1503                 if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1504                                 port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1505                         data_size = rx_mode.max_rx_pkt_len /
1506                                 port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1507
1508                         if ((data_size + RTE_PKTMBUF_HEADROOM) >
1509                                                         mbuf_data_size[0]) {
1510                                 mbuf_data_size[0] = data_size +
1511                                                  RTE_PKTMBUF_HEADROOM;
1512                                 warning = 1;
1513                         }
1514                 }
1515         }
1516
1517         if (warning)
1518                 TESTPMD_LOG(WARNING,
1519                             "Configured mbuf size of the first segment %hu\n",
1520                             mbuf_data_size[0]);
1521         /*
1522          * Create pools of mbuf.
1523          * If NUMA support is disabled, create a single pool of mbuf in
1524          * socket 0 memory by default.
1525          * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1526          *
1527          * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1528          * nb_txd can be configured at run time.
1529          */
1530         if (param_total_num_mbufs)
1531                 nb_mbuf_per_pool = param_total_num_mbufs;
1532         else {
1533                 nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1534                         (nb_lcores * mb_mempool_cache) +
1535                         RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1536                 nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1537         }
1538
1539         if (numa_support) {
1540                 uint8_t i, j;
1541
1542                 for (i = 0; i < num_sockets; i++)
1543                         for (j = 0; j < mbuf_data_size_n; j++)
1544                                 mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1545                                         mbuf_pool_create(mbuf_data_size[j],
1546                                                           nb_mbuf_per_pool,
1547                                                           socket_ids[i], j);
1548         } else {
1549                 uint8_t i;
1550
1551                 for (i = 0; i < mbuf_data_size_n; i++)
1552                         mempools[i] = mbuf_pool_create
1553                                         (mbuf_data_size[i],
1554                                          nb_mbuf_per_pool,
1555                                          socket_num == UMA_NO_CONFIG ?
1556                                          0 : socket_num, i);
1557         }
1558
1559         init_port_config();
1560
1561         gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1562                 DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1563         /*
1564          * Records which Mbuf pool to use by each logical core, if needed.
1565          */
1566         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1567                 mbp = mbuf_pool_find(
1568                         rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1569
1570                 if (mbp == NULL)
1571                         mbp = mbuf_pool_find(0, 0);
1572                 fwd_lcores[lc_id]->mbp = mbp;
1573                 /* initialize GSO context */
1574                 fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1575                 fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1576                 fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1577                 fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1578                         RTE_ETHER_CRC_LEN;
1579                 fwd_lcores[lc_id]->gso_ctx.flag = 0;
1580         }
1581
1582         /* Configuration of packet forwarding streams. */
1583         if (init_fwd_streams() < 0)
1584                 rte_exit(EXIT_FAILURE, "FAIL from init_fwd_streams()\n");
1585
1586         fwd_config_setup();
1587
1588         /* create a gro context for each lcore */
1589         gro_param.gro_types = RTE_GRO_TCP_IPV4;
1590         gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1591         gro_param.max_item_per_flow = MAX_PKT_BURST;
1592         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1593                 gro_param.socket_id = rte_lcore_to_socket_id(
1594                                 fwd_lcores_cpuids[lc_id]);
1595                 fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1596                 if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1597                         rte_exit(EXIT_FAILURE,
1598                                         "rte_gro_ctx_create() failed\n");
1599                 }
1600         }
1601 }
1602
1603
1604 void
1605 reconfig(portid_t new_port_id, unsigned socket_id)
1606 {
1607         struct rte_port *port;
1608         int ret;
1609
1610         /* Reconfiguration of Ethernet ports. */
1611         port = &ports[new_port_id];
1612
1613         ret = eth_dev_info_get_print_err(new_port_id, &port->dev_info);
1614         if (ret != 0)
1615                 return;
1616
1617         /* set flag to initialize port/queue */
1618         port->need_reconfig = 1;
1619         port->need_reconfig_queues = 1;
1620         port->socket_id = socket_id;
1621
1622         init_port_config();
1623 }
1624
1625
1626 int
1627 init_fwd_streams(void)
1628 {
1629         portid_t pid;
1630         struct rte_port *port;
1631         streamid_t sm_id, nb_fwd_streams_new;
1632         queueid_t q;
1633
1634         /* set socket id according to numa or not */
1635         RTE_ETH_FOREACH_DEV(pid) {
1636                 port = &ports[pid];
1637                 if (nb_rxq > port->dev_info.max_rx_queues) {
1638                         printf("Fail: nb_rxq(%d) is greater than "
1639                                 "max_rx_queues(%d)\n", nb_rxq,
1640                                 port->dev_info.max_rx_queues);
1641                         return -1;
1642                 }
1643                 if (nb_txq > port->dev_info.max_tx_queues) {
1644                         printf("Fail: nb_txq(%d) is greater than "
1645                                 "max_tx_queues(%d)\n", nb_txq,
1646                                 port->dev_info.max_tx_queues);
1647                         return -1;
1648                 }
1649                 if (numa_support) {
1650                         if (port_numa[pid] != NUMA_NO_CONFIG)
1651                                 port->socket_id = port_numa[pid];
1652                         else {
1653                                 port->socket_id = rte_eth_dev_socket_id(pid);
1654
1655                                 /*
1656                                  * if socket_id is invalid,
1657                                  * set to the first available socket.
1658                                  */
1659                                 if (check_socket_id(port->socket_id) < 0)
1660                                         port->socket_id = socket_ids[0];
1661                         }
1662                 }
1663                 else {
1664                         if (socket_num == UMA_NO_CONFIG)
1665                                 port->socket_id = 0;
1666                         else
1667                                 port->socket_id = socket_num;
1668                 }
1669         }
1670
1671         q = RTE_MAX(nb_rxq, nb_txq);
1672         if (q == 0) {
1673                 printf("Fail: Cannot allocate fwd streams as number of queues is 0\n");
1674                 return -1;
1675         }
1676         nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1677         if (nb_fwd_streams_new == nb_fwd_streams)
1678                 return 0;
1679         /* clear the old */
1680         if (fwd_streams != NULL) {
1681                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1682                         if (fwd_streams[sm_id] == NULL)
1683                                 continue;
1684                         rte_free(fwd_streams[sm_id]);
1685                         fwd_streams[sm_id] = NULL;
1686                 }
1687                 rte_free(fwd_streams);
1688                 fwd_streams = NULL;
1689         }
1690
1691         /* init new */
1692         nb_fwd_streams = nb_fwd_streams_new;
1693         if (nb_fwd_streams) {
1694                 fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1695                         sizeof(struct fwd_stream *) * nb_fwd_streams,
1696                         RTE_CACHE_LINE_SIZE);
1697                 if (fwd_streams == NULL)
1698                         rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1699                                  " (struct fwd_stream *)) failed\n",
1700                                  nb_fwd_streams);
1701
1702                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1703                         fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1704                                 " struct fwd_stream", sizeof(struct fwd_stream),
1705                                 RTE_CACHE_LINE_SIZE);
1706                         if (fwd_streams[sm_id] == NULL)
1707                                 rte_exit(EXIT_FAILURE, "rte_zmalloc"
1708                                          "(struct fwd_stream) failed\n");
1709                 }
1710         }
1711
1712         return 0;
1713 }
1714
1715 static void
1716 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1717 {
1718         uint64_t total_burst, sburst;
1719         uint64_t nb_burst;
1720         uint64_t burst_stats[4];
1721         uint16_t pktnb_stats[4];
1722         uint16_t nb_pkt;
1723         int burst_percent[4], sburstp;
1724         int i;
1725
1726         /*
1727          * First compute the total number of packet bursts and the
1728          * two highest numbers of bursts of the same number of packets.
1729          */
1730         memset(&burst_stats, 0x0, sizeof(burst_stats));
1731         memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1732
1733         /* Show stats for 0 burst size always */
1734         total_burst = pbs->pkt_burst_spread[0];
1735         burst_stats[0] = pbs->pkt_burst_spread[0];
1736         pktnb_stats[0] = 0;
1737
1738         /* Find the next 2 burst sizes with highest occurrences. */
1739         for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1740                 nb_burst = pbs->pkt_burst_spread[nb_pkt];
1741
1742                 if (nb_burst == 0)
1743                         continue;
1744
1745                 total_burst += nb_burst;
1746
1747                 if (nb_burst > burst_stats[1]) {
1748                         burst_stats[2] = burst_stats[1];
1749                         pktnb_stats[2] = pktnb_stats[1];
1750                         burst_stats[1] = nb_burst;
1751                         pktnb_stats[1] = nb_pkt;
1752                 } else if (nb_burst > burst_stats[2]) {
1753                         burst_stats[2] = nb_burst;
1754                         pktnb_stats[2] = nb_pkt;
1755                 }
1756         }
1757         if (total_burst == 0)
1758                 return;
1759
1760         printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1761         for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1762                 if (i == 3) {
1763                         printf("%d%% of other]\n", 100 - sburstp);
1764                         return;
1765                 }
1766
1767                 sburst += burst_stats[i];
1768                 if (sburst == total_burst) {
1769                         printf("%d%% of %d pkts]\n",
1770                                 100 - sburstp, (int) pktnb_stats[i]);
1771                         return;
1772                 }
1773
1774                 burst_percent[i] =
1775                         (double)burst_stats[i] / total_burst * 100;
1776                 printf("%d%% of %d pkts + ",
1777                         burst_percent[i], (int) pktnb_stats[i]);
1778                 sburstp += burst_percent[i];
1779         }
1780 }
1781
1782 static void
1783 fwd_stream_stats_display(streamid_t stream_id)
1784 {
1785         struct fwd_stream *fs;
1786         static const char *fwd_top_stats_border = "-------";
1787
1788         fs = fwd_streams[stream_id];
1789         if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1790             (fs->fwd_dropped == 0))
1791                 return;
1792         printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1793                "TX Port=%2d/Queue=%2d %s\n",
1794                fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1795                fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1796         printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1797                " TX-dropped: %-14"PRIu64,
1798                fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1799
1800         /* if checksum mode */
1801         if (cur_fwd_eng == &csum_fwd_engine) {
1802                 printf("  RX- bad IP checksum: %-14"PRIu64
1803                        "  Rx- bad L4 checksum: %-14"PRIu64
1804                        " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1805                         fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1806                         fs->rx_bad_outer_l4_csum);
1807                 printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1808                         fs->rx_bad_outer_ip_csum);
1809         } else {
1810                 printf("\n");
1811         }
1812
1813         if (record_burst_stats) {
1814                 pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1815                 pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1816         }
1817 }
1818
1819 void
1820 fwd_stats_display(void)
1821 {
1822         static const char *fwd_stats_border = "----------------------";
1823         static const char *acc_stats_border = "+++++++++++++++";
1824         struct {
1825                 struct fwd_stream *rx_stream;
1826                 struct fwd_stream *tx_stream;
1827                 uint64_t tx_dropped;
1828                 uint64_t rx_bad_ip_csum;
1829                 uint64_t rx_bad_l4_csum;
1830                 uint64_t rx_bad_outer_l4_csum;
1831                 uint64_t rx_bad_outer_ip_csum;
1832         } ports_stats[RTE_MAX_ETHPORTS];
1833         uint64_t total_rx_dropped = 0;
1834         uint64_t total_tx_dropped = 0;
1835         uint64_t total_rx_nombuf = 0;
1836         struct rte_eth_stats stats;
1837         uint64_t fwd_cycles = 0;
1838         uint64_t total_recv = 0;
1839         uint64_t total_xmit = 0;
1840         struct rte_port *port;
1841         streamid_t sm_id;
1842         portid_t pt_id;
1843         int i;
1844
1845         memset(ports_stats, 0, sizeof(ports_stats));
1846
1847         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1848                 struct fwd_stream *fs = fwd_streams[sm_id];
1849
1850                 if (cur_fwd_config.nb_fwd_streams >
1851                     cur_fwd_config.nb_fwd_ports) {
1852                         fwd_stream_stats_display(sm_id);
1853                 } else {
1854                         ports_stats[fs->tx_port].tx_stream = fs;
1855                         ports_stats[fs->rx_port].rx_stream = fs;
1856                 }
1857
1858                 ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
1859
1860                 ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
1861                 ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
1862                 ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
1863                                 fs->rx_bad_outer_l4_csum;
1864                 ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
1865                                 fs->rx_bad_outer_ip_csum;
1866
1867                 if (record_core_cycles)
1868                         fwd_cycles += fs->core_cycles;
1869         }
1870         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1871                 pt_id = fwd_ports_ids[i];
1872                 port = &ports[pt_id];
1873
1874                 rte_eth_stats_get(pt_id, &stats);
1875                 stats.ipackets -= port->stats.ipackets;
1876                 stats.opackets -= port->stats.opackets;
1877                 stats.ibytes -= port->stats.ibytes;
1878                 stats.obytes -= port->stats.obytes;
1879                 stats.imissed -= port->stats.imissed;
1880                 stats.oerrors -= port->stats.oerrors;
1881                 stats.rx_nombuf -= port->stats.rx_nombuf;
1882
1883                 total_recv += stats.ipackets;
1884                 total_xmit += stats.opackets;
1885                 total_rx_dropped += stats.imissed;
1886                 total_tx_dropped += ports_stats[pt_id].tx_dropped;
1887                 total_tx_dropped += stats.oerrors;
1888                 total_rx_nombuf  += stats.rx_nombuf;
1889
1890                 printf("\n  %s Forward statistics for port %-2d %s\n",
1891                        fwd_stats_border, pt_id, fwd_stats_border);
1892
1893                 printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
1894                        "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
1895                        stats.ipackets + stats.imissed);
1896
1897                 if (cur_fwd_eng == &csum_fwd_engine) {
1898                         printf("  Bad-ipcsum: %-14"PRIu64
1899                                " Bad-l4csum: %-14"PRIu64
1900                                "Bad-outer-l4csum: %-14"PRIu64"\n",
1901                                ports_stats[pt_id].rx_bad_ip_csum,
1902                                ports_stats[pt_id].rx_bad_l4_csum,
1903                                ports_stats[pt_id].rx_bad_outer_l4_csum);
1904                         printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
1905                                ports_stats[pt_id].rx_bad_outer_ip_csum);
1906                 }
1907                 if (stats.ierrors + stats.rx_nombuf > 0) {
1908                         printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
1909                         printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
1910                 }
1911
1912                 printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
1913                        "TX-total: %-"PRIu64"\n",
1914                        stats.opackets, ports_stats[pt_id].tx_dropped,
1915                        stats.opackets + ports_stats[pt_id].tx_dropped);
1916
1917                 if (record_burst_stats) {
1918                         if (ports_stats[pt_id].rx_stream)
1919                                 pkt_burst_stats_display("RX",
1920                                         &ports_stats[pt_id].rx_stream->rx_burst_stats);
1921                         if (ports_stats[pt_id].tx_stream)
1922                                 pkt_burst_stats_display("TX",
1923                                 &ports_stats[pt_id].tx_stream->tx_burst_stats);
1924                 }
1925
1926                 printf("  %s--------------------------------%s\n",
1927                        fwd_stats_border, fwd_stats_border);
1928         }
1929
1930         printf("\n  %s Accumulated forward statistics for all ports"
1931                "%s\n",
1932                acc_stats_border, acc_stats_border);
1933         printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
1934                "%-"PRIu64"\n"
1935                "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
1936                "%-"PRIu64"\n",
1937                total_recv, total_rx_dropped, total_recv + total_rx_dropped,
1938                total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
1939         if (total_rx_nombuf > 0)
1940                 printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
1941         printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
1942                "%s\n",
1943                acc_stats_border, acc_stats_border);
1944         if (record_core_cycles) {
1945 #define CYC_PER_MHZ 1E6
1946                 if (total_recv > 0 || total_xmit > 0) {
1947                         uint64_t total_pkts = 0;
1948                         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
1949                             strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
1950                                 total_pkts = total_xmit;
1951                         else
1952                                 total_pkts = total_recv;
1953
1954                         printf("\n  CPU cycles/packet=%.2F (total cycles="
1955                                "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
1956                                " MHz Clock\n",
1957                                (double) fwd_cycles / total_pkts,
1958                                fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
1959                                (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
1960                 }
1961         }
1962 }
1963
1964 void
1965 fwd_stats_reset(void)
1966 {
1967         streamid_t sm_id;
1968         portid_t pt_id;
1969         int i;
1970
1971         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1972                 pt_id = fwd_ports_ids[i];
1973                 rte_eth_stats_get(pt_id, &ports[pt_id].stats);
1974         }
1975         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1976                 struct fwd_stream *fs = fwd_streams[sm_id];
1977
1978                 fs->rx_packets = 0;
1979                 fs->tx_packets = 0;
1980                 fs->fwd_dropped = 0;
1981                 fs->rx_bad_ip_csum = 0;
1982                 fs->rx_bad_l4_csum = 0;
1983                 fs->rx_bad_outer_l4_csum = 0;
1984                 fs->rx_bad_outer_ip_csum = 0;
1985
1986                 memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
1987                 memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
1988                 fs->core_cycles = 0;
1989         }
1990 }
1991
1992 static void
1993 flush_fwd_rx_queues(void)
1994 {
1995         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1996         portid_t  rxp;
1997         portid_t port_id;
1998         queueid_t rxq;
1999         uint16_t  nb_rx;
2000         uint16_t  i;
2001         uint8_t   j;
2002         uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2003         uint64_t timer_period;
2004
2005         /* convert to number of cycles */
2006         timer_period = rte_get_timer_hz(); /* 1 second timeout */
2007
2008         for (j = 0; j < 2; j++) {
2009                 for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2010                         for (rxq = 0; rxq < nb_rxq; rxq++) {
2011                                 port_id = fwd_ports_ids[rxp];
2012                                 /**
2013                                 * testpmd can stuck in the below do while loop
2014                                 * if rte_eth_rx_burst() always returns nonzero
2015                                 * packets. So timer is added to exit this loop
2016                                 * after 1sec timer expiry.
2017                                 */
2018                                 prev_tsc = rte_rdtsc();
2019                                 do {
2020                                         nb_rx = rte_eth_rx_burst(port_id, rxq,
2021                                                 pkts_burst, MAX_PKT_BURST);
2022                                         for (i = 0; i < nb_rx; i++)
2023                                                 rte_pktmbuf_free(pkts_burst[i]);
2024
2025                                         cur_tsc = rte_rdtsc();
2026                                         diff_tsc = cur_tsc - prev_tsc;
2027                                         timer_tsc += diff_tsc;
2028                                 } while ((nb_rx > 0) &&
2029                                         (timer_tsc < timer_period));
2030                                 timer_tsc = 0;
2031                         }
2032                 }
2033                 rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2034         }
2035 }
2036
2037 static void
2038 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2039 {
2040         struct fwd_stream **fsm;
2041         streamid_t nb_fs;
2042         streamid_t sm_id;
2043 #ifdef RTE_LIB_BITRATESTATS
2044         uint64_t tics_per_1sec;
2045         uint64_t tics_datum;
2046         uint64_t tics_current;
2047         uint16_t i, cnt_ports;
2048
2049         cnt_ports = nb_ports;
2050         tics_datum = rte_rdtsc();
2051         tics_per_1sec = rte_get_timer_hz();
2052 #endif
2053         fsm = &fwd_streams[fc->stream_idx];
2054         nb_fs = fc->stream_nb;
2055         do {
2056                 for (sm_id = 0; sm_id < nb_fs; sm_id++)
2057                         (*pkt_fwd)(fsm[sm_id]);
2058 #ifdef RTE_LIB_BITRATESTATS
2059                 if (bitrate_enabled != 0 &&
2060                                 bitrate_lcore_id == rte_lcore_id()) {
2061                         tics_current = rte_rdtsc();
2062                         if (tics_current - tics_datum >= tics_per_1sec) {
2063                                 /* Periodic bitrate calculation */
2064                                 for (i = 0; i < cnt_ports; i++)
2065                                         rte_stats_bitrate_calc(bitrate_data,
2066                                                 ports_ids[i]);
2067                                 tics_datum = tics_current;
2068                         }
2069                 }
2070 #endif
2071 #ifdef RTE_LIB_LATENCYSTATS
2072                 if (latencystats_enabled != 0 &&
2073                                 latencystats_lcore_id == rte_lcore_id())
2074                         rte_latencystats_update();
2075 #endif
2076
2077         } while (! fc->stopped);
2078 }
2079
2080 static int
2081 start_pkt_forward_on_core(void *fwd_arg)
2082 {
2083         run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2084                              cur_fwd_config.fwd_eng->packet_fwd);
2085         return 0;
2086 }
2087
2088 /*
2089  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2090  * Used to start communication flows in network loopback test configurations.
2091  */
2092 static int
2093 run_one_txonly_burst_on_core(void *fwd_arg)
2094 {
2095         struct fwd_lcore *fwd_lc;
2096         struct fwd_lcore tmp_lcore;
2097
2098         fwd_lc = (struct fwd_lcore *) fwd_arg;
2099         tmp_lcore = *fwd_lc;
2100         tmp_lcore.stopped = 1;
2101         run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2102         return 0;
2103 }
2104
2105 /*
2106  * Launch packet forwarding:
2107  *     - Setup per-port forwarding context.
2108  *     - launch logical cores with their forwarding configuration.
2109  */
2110 static void
2111 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2112 {
2113         port_fwd_begin_t port_fwd_begin;
2114         unsigned int i;
2115         unsigned int lc_id;
2116         int diag;
2117
2118         port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2119         if (port_fwd_begin != NULL) {
2120                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2121                         (*port_fwd_begin)(fwd_ports_ids[i]);
2122         }
2123         for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2124                 lc_id = fwd_lcores_cpuids[i];
2125                 if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2126                         fwd_lcores[i]->stopped = 0;
2127                         diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2128                                                      fwd_lcores[i], lc_id);
2129                         if (diag != 0)
2130                                 printf("launch lcore %u failed - diag=%d\n",
2131                                        lc_id, diag);
2132                 }
2133         }
2134 }
2135
2136 /*
2137  * Launch packet forwarding configuration.
2138  */
2139 void
2140 start_packet_forwarding(int with_tx_first)
2141 {
2142         port_fwd_begin_t port_fwd_begin;
2143         port_fwd_end_t  port_fwd_end;
2144         struct rte_port *port;
2145         unsigned int i;
2146         portid_t   pt_id;
2147
2148         if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2149                 rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2150
2151         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2152                 rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2153
2154         if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2155                 strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2156                 (!nb_rxq || !nb_txq))
2157                 rte_exit(EXIT_FAILURE,
2158                         "Either rxq or txq are 0, cannot use %s fwd mode\n",
2159                         cur_fwd_eng->fwd_mode_name);
2160
2161         if (all_ports_started() == 0) {
2162                 printf("Not all ports were started\n");
2163                 return;
2164         }
2165         if (test_done == 0) {
2166                 printf("Packet forwarding already started\n");
2167                 return;
2168         }
2169
2170
2171         if(dcb_test) {
2172                 for (i = 0; i < nb_fwd_ports; i++) {
2173                         pt_id = fwd_ports_ids[i];
2174                         port = &ports[pt_id];
2175                         if (!port->dcb_flag) {
2176                                 printf("In DCB mode, all forwarding ports must "
2177                                        "be configured in this mode.\n");
2178                                 return;
2179                         }
2180                 }
2181                 if (nb_fwd_lcores == 1) {
2182                         printf("In DCB mode,the nb forwarding cores "
2183                                "should be larger than 1.\n");
2184                         return;
2185                 }
2186         }
2187         test_done = 0;
2188
2189         fwd_config_setup();
2190
2191         if(!no_flush_rx)
2192                 flush_fwd_rx_queues();
2193
2194         pkt_fwd_config_display(&cur_fwd_config);
2195         rxtx_config_display();
2196
2197         fwd_stats_reset();
2198         if (with_tx_first) {
2199                 port_fwd_begin = tx_only_engine.port_fwd_begin;
2200                 if (port_fwd_begin != NULL) {
2201                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2202                                 (*port_fwd_begin)(fwd_ports_ids[i]);
2203                 }
2204                 while (with_tx_first--) {
2205                         launch_packet_forwarding(
2206                                         run_one_txonly_burst_on_core);
2207                         rte_eal_mp_wait_lcore();
2208                 }
2209                 port_fwd_end = tx_only_engine.port_fwd_end;
2210                 if (port_fwd_end != NULL) {
2211                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2212                                 (*port_fwd_end)(fwd_ports_ids[i]);
2213                 }
2214         }
2215         launch_packet_forwarding(start_pkt_forward_on_core);
2216 }
2217
2218 void
2219 stop_packet_forwarding(void)
2220 {
2221         port_fwd_end_t port_fwd_end;
2222         lcoreid_t lc_id;
2223         portid_t pt_id;
2224         int i;
2225
2226         if (test_done) {
2227                 printf("Packet forwarding not started\n");
2228                 return;
2229         }
2230         printf("Telling cores to stop...");
2231         for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2232                 fwd_lcores[lc_id]->stopped = 1;
2233         printf("\nWaiting for lcores to finish...\n");
2234         rte_eal_mp_wait_lcore();
2235         port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2236         if (port_fwd_end != NULL) {
2237                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2238                         pt_id = fwd_ports_ids[i];
2239                         (*port_fwd_end)(pt_id);
2240                 }
2241         }
2242
2243         fwd_stats_display();
2244
2245         printf("\nDone.\n");
2246         test_done = 1;
2247 }
2248
2249 void
2250 dev_set_link_up(portid_t pid)
2251 {
2252         if (rte_eth_dev_set_link_up(pid) < 0)
2253                 printf("\nSet link up fail.\n");
2254 }
2255
2256 void
2257 dev_set_link_down(portid_t pid)
2258 {
2259         if (rte_eth_dev_set_link_down(pid) < 0)
2260                 printf("\nSet link down fail.\n");
2261 }
2262
2263 static int
2264 all_ports_started(void)
2265 {
2266         portid_t pi;
2267         struct rte_port *port;
2268
2269         RTE_ETH_FOREACH_DEV(pi) {
2270                 port = &ports[pi];
2271                 /* Check if there is a port which is not started */
2272                 if ((port->port_status != RTE_PORT_STARTED) &&
2273                         (port->slave_flag == 0))
2274                         return 0;
2275         }
2276
2277         /* No port is not started */
2278         return 1;
2279 }
2280
2281 int
2282 port_is_stopped(portid_t port_id)
2283 {
2284         struct rte_port *port = &ports[port_id];
2285
2286         if ((port->port_status != RTE_PORT_STOPPED) &&
2287             (port->slave_flag == 0))
2288                 return 0;
2289         return 1;
2290 }
2291
2292 int
2293 all_ports_stopped(void)
2294 {
2295         portid_t pi;
2296
2297         RTE_ETH_FOREACH_DEV(pi) {
2298                 if (!port_is_stopped(pi))
2299                         return 0;
2300         }
2301
2302         return 1;
2303 }
2304
2305 int
2306 port_is_started(portid_t port_id)
2307 {
2308         if (port_id_is_invalid(port_id, ENABLED_WARN))
2309                 return 0;
2310
2311         if (ports[port_id].port_status != RTE_PORT_STARTED)
2312                 return 0;
2313
2314         return 1;
2315 }
2316
2317 /* Configure the Rx and Tx hairpin queues for the selected port. */
2318 static int
2319 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2320 {
2321         queueid_t qi;
2322         struct rte_eth_hairpin_conf hairpin_conf = {
2323                 .peer_count = 1,
2324         };
2325         int i;
2326         int diag;
2327         struct rte_port *port = &ports[pi];
2328         uint16_t peer_rx_port = pi;
2329         uint16_t peer_tx_port = pi;
2330         uint32_t manual = 1;
2331         uint32_t tx_exp = hairpin_mode & 0x10;
2332
2333         if (!(hairpin_mode & 0xf)) {
2334                 peer_rx_port = pi;
2335                 peer_tx_port = pi;
2336                 manual = 0;
2337         } else if (hairpin_mode & 0x1) {
2338                 peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2339                                                        RTE_ETH_DEV_NO_OWNER);
2340                 if (peer_tx_port >= RTE_MAX_ETHPORTS)
2341                         peer_tx_port = rte_eth_find_next_owned_by(0,
2342                                                 RTE_ETH_DEV_NO_OWNER);
2343                 if (p_pi != RTE_MAX_ETHPORTS) {
2344                         peer_rx_port = p_pi;
2345                 } else {
2346                         uint16_t next_pi;
2347
2348                         /* Last port will be the peer RX port of the first. */
2349                         RTE_ETH_FOREACH_DEV(next_pi)
2350                                 peer_rx_port = next_pi;
2351                 }
2352                 manual = 1;
2353         } else if (hairpin_mode & 0x2) {
2354                 if (cnt_pi & 0x1) {
2355                         peer_rx_port = p_pi;
2356                 } else {
2357                         peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2358                                                 RTE_ETH_DEV_NO_OWNER);
2359                         if (peer_rx_port >= RTE_MAX_ETHPORTS)
2360                                 peer_rx_port = pi;
2361                 }
2362                 peer_tx_port = peer_rx_port;
2363                 manual = 1;
2364         }
2365
2366         for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2367                 hairpin_conf.peers[0].port = peer_rx_port;
2368                 hairpin_conf.peers[0].queue = i + nb_rxq;
2369                 hairpin_conf.manual_bind = !!manual;
2370                 hairpin_conf.tx_explicit = !!tx_exp;
2371                 diag = rte_eth_tx_hairpin_queue_setup
2372                         (pi, qi, nb_txd, &hairpin_conf);
2373                 i++;
2374                 if (diag == 0)
2375                         continue;
2376
2377                 /* Fail to setup rx queue, return */
2378                 if (rte_atomic16_cmpset(&(port->port_status),
2379                                         RTE_PORT_HANDLING,
2380                                         RTE_PORT_STOPPED) == 0)
2381                         printf("Port %d can not be set back "
2382                                         "to stopped\n", pi);
2383                 printf("Fail to configure port %d hairpin "
2384                                 "queues\n", pi);
2385                 /* try to reconfigure queues next time */
2386                 port->need_reconfig_queues = 1;
2387                 return -1;
2388         }
2389         for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2390                 hairpin_conf.peers[0].port = peer_tx_port;
2391                 hairpin_conf.peers[0].queue = i + nb_txq;
2392                 hairpin_conf.manual_bind = !!manual;
2393                 hairpin_conf.tx_explicit = !!tx_exp;
2394                 diag = rte_eth_rx_hairpin_queue_setup
2395                         (pi, qi, nb_rxd, &hairpin_conf);
2396                 i++;
2397                 if (diag == 0)
2398                         continue;
2399
2400                 /* Fail to setup rx queue, return */
2401                 if (rte_atomic16_cmpset(&(port->port_status),
2402                                         RTE_PORT_HANDLING,
2403                                         RTE_PORT_STOPPED) == 0)
2404                         printf("Port %d can not be set back "
2405                                         "to stopped\n", pi);
2406                 printf("Fail to configure port %d hairpin "
2407                                 "queues\n", pi);
2408                 /* try to reconfigure queues next time */
2409                 port->need_reconfig_queues = 1;
2410                 return -1;
2411         }
2412         return 0;
2413 }
2414
2415 /* Configure the Rx with optional split. */
2416 int
2417 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2418                uint16_t nb_rx_desc, unsigned int socket_id,
2419                struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2420 {
2421         union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2422         unsigned int i, mp_n;
2423         int ret;
2424
2425         if (rx_pkt_nb_segs <= 1 ||
2426             (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2427                 rx_conf->rx_seg = NULL;
2428                 rx_conf->rx_nseg = 0;
2429                 ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2430                                              nb_rx_desc, socket_id,
2431                                              rx_conf, mp);
2432                 return ret;
2433         }
2434         for (i = 0; i < rx_pkt_nb_segs; i++) {
2435                 struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2436                 struct rte_mempool *mpx;
2437                 /*
2438                  * Use last valid pool for the segments with number
2439                  * exceeding the pool index.
2440                  */
2441                 mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2442                 mpx = mbuf_pool_find(socket_id, mp_n);
2443                 /* Handle zero as mbuf data buffer size. */
2444                 rx_seg->length = rx_pkt_seg_lengths[i] ?
2445                                    rx_pkt_seg_lengths[i] :
2446                                    mbuf_data_size[mp_n];
2447                 rx_seg->offset = i < rx_pkt_nb_offs ?
2448                                    rx_pkt_seg_offsets[i] : 0;
2449                 rx_seg->mp = mpx ? mpx : mp;
2450         }
2451         rx_conf->rx_nseg = rx_pkt_nb_segs;
2452         rx_conf->rx_seg = rx_useg;
2453         ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2454                                     socket_id, rx_conf, NULL);
2455         rx_conf->rx_seg = NULL;
2456         rx_conf->rx_nseg = 0;
2457         return ret;
2458 }
2459
2460 int
2461 start_port(portid_t pid)
2462 {
2463         int diag, need_check_link_status = -1;
2464         portid_t pi;
2465         portid_t p_pi = RTE_MAX_ETHPORTS;
2466         portid_t pl[RTE_MAX_ETHPORTS];
2467         portid_t peer_pl[RTE_MAX_ETHPORTS];
2468         uint16_t cnt_pi = 0;
2469         uint16_t cfg_pi = 0;
2470         int peer_pi;
2471         queueid_t qi;
2472         struct rte_port *port;
2473         struct rte_ether_addr mac_addr;
2474         struct rte_eth_hairpin_cap cap;
2475
2476         if (port_id_is_invalid(pid, ENABLED_WARN))
2477                 return 0;
2478
2479         if(dcb_config)
2480                 dcb_test = 1;
2481         RTE_ETH_FOREACH_DEV(pi) {
2482                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2483                         continue;
2484
2485                 need_check_link_status = 0;
2486                 port = &ports[pi];
2487                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2488                                                  RTE_PORT_HANDLING) == 0) {
2489                         printf("Port %d is now not stopped\n", pi);
2490                         continue;
2491                 }
2492
2493                 if (port->need_reconfig > 0) {
2494                         port->need_reconfig = 0;
2495
2496                         if (flow_isolate_all) {
2497                                 int ret = port_flow_isolate(pi, 1);
2498                                 if (ret) {
2499                                         printf("Failed to apply isolated"
2500                                                " mode on port %d\n", pi);
2501                                         return -1;
2502                                 }
2503                         }
2504                         configure_rxtx_dump_callbacks(0);
2505                         printf("Configuring Port %d (socket %u)\n", pi,
2506                                         port->socket_id);
2507                         if (nb_hairpinq > 0 &&
2508                             rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2509                                 printf("Port %d doesn't support hairpin "
2510                                        "queues\n", pi);
2511                                 return -1;
2512                         }
2513                         /* configure port */
2514                         diag = rte_eth_dev_configure(pi, nb_rxq + nb_hairpinq,
2515                                                      nb_txq + nb_hairpinq,
2516                                                      &(port->dev_conf));
2517                         if (diag != 0) {
2518                                 if (rte_atomic16_cmpset(&(port->port_status),
2519                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2520                                         printf("Port %d can not be set back "
2521                                                         "to stopped\n", pi);
2522                                 printf("Fail to configure port %d\n", pi);
2523                                 /* try to reconfigure port next time */
2524                                 port->need_reconfig = 1;
2525                                 return -1;
2526                         }
2527                 }
2528                 if (port->need_reconfig_queues > 0) {
2529                         port->need_reconfig_queues = 0;
2530                         /* setup tx queues */
2531                         for (qi = 0; qi < nb_txq; qi++) {
2532                                 if ((numa_support) &&
2533                                         (txring_numa[pi] != NUMA_NO_CONFIG))
2534                                         diag = rte_eth_tx_queue_setup(pi, qi,
2535                                                 port->nb_tx_desc[qi],
2536                                                 txring_numa[pi],
2537                                                 &(port->tx_conf[qi]));
2538                                 else
2539                                         diag = rte_eth_tx_queue_setup(pi, qi,
2540                                                 port->nb_tx_desc[qi],
2541                                                 port->socket_id,
2542                                                 &(port->tx_conf[qi]));
2543
2544                                 if (diag == 0)
2545                                         continue;
2546
2547                                 /* Fail to setup tx queue, return */
2548                                 if (rte_atomic16_cmpset(&(port->port_status),
2549                                                         RTE_PORT_HANDLING,
2550                                                         RTE_PORT_STOPPED) == 0)
2551                                         printf("Port %d can not be set back "
2552                                                         "to stopped\n", pi);
2553                                 printf("Fail to configure port %d tx queues\n",
2554                                        pi);
2555                                 /* try to reconfigure queues next time */
2556                                 port->need_reconfig_queues = 1;
2557                                 return -1;
2558                         }
2559                         for (qi = 0; qi < nb_rxq; qi++) {
2560                                 /* setup rx queues */
2561                                 if ((numa_support) &&
2562                                         (rxring_numa[pi] != NUMA_NO_CONFIG)) {
2563                                         struct rte_mempool * mp =
2564                                                 mbuf_pool_find
2565                                                         (rxring_numa[pi], 0);
2566                                         if (mp == NULL) {
2567                                                 printf("Failed to setup RX queue:"
2568                                                         "No mempool allocation"
2569                                                         " on the socket %d\n",
2570                                                         rxring_numa[pi]);
2571                                                 return -1;
2572                                         }
2573
2574                                         diag = rx_queue_setup(pi, qi,
2575                                              port->nb_rx_desc[qi],
2576                                              rxring_numa[pi],
2577                                              &(port->rx_conf[qi]),
2578                                              mp);
2579                                 } else {
2580                                         struct rte_mempool *mp =
2581                                                 mbuf_pool_find
2582                                                         (port->socket_id, 0);
2583                                         if (mp == NULL) {
2584                                                 printf("Failed to setup RX queue:"
2585                                                         "No mempool allocation"
2586                                                         " on the socket %d\n",
2587                                                         port->socket_id);
2588                                                 return -1;
2589                                         }
2590                                         diag = rx_queue_setup(pi, qi,
2591                                              port->nb_rx_desc[qi],
2592                                              port->socket_id,
2593                                              &(port->rx_conf[qi]),
2594                                              mp);
2595                                 }
2596                                 if (diag == 0)
2597                                         continue;
2598
2599                                 /* Fail to setup rx queue, return */
2600                                 if (rte_atomic16_cmpset(&(port->port_status),
2601                                                         RTE_PORT_HANDLING,
2602                                                         RTE_PORT_STOPPED) == 0)
2603                                         printf("Port %d can not be set back "
2604                                                         "to stopped\n", pi);
2605                                 printf("Fail to configure port %d rx queues\n",
2606                                        pi);
2607                                 /* try to reconfigure queues next time */
2608                                 port->need_reconfig_queues = 1;
2609                                 return -1;
2610                         }
2611                         /* setup hairpin queues */
2612                         if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2613                                 return -1;
2614                 }
2615                 configure_rxtx_dump_callbacks(verbose_level);
2616                 if (clear_ptypes) {
2617                         diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2618                                         NULL, 0);
2619                         if (diag < 0)
2620                                 printf(
2621                                 "Port %d: Failed to disable Ptype parsing\n",
2622                                 pi);
2623                 }
2624
2625                 p_pi = pi;
2626                 cnt_pi++;
2627
2628                 /* start port */
2629                 diag = rte_eth_dev_start(pi);
2630                 if (diag < 0) {
2631                         printf("Fail to start port %d: %s\n", pi,
2632                                rte_strerror(-diag));
2633
2634                         /* Fail to setup rx queue, return */
2635                         if (rte_atomic16_cmpset(&(port->port_status),
2636                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2637                                 printf("Port %d can not be set back to "
2638                                                         "stopped\n", pi);
2639                         continue;
2640                 }
2641
2642                 if (rte_atomic16_cmpset(&(port->port_status),
2643                         RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2644                         printf("Port %d can not be set into started\n", pi);
2645
2646                 if (eth_macaddr_get_print_err(pi, &mac_addr) == 0)
2647                         printf("Port %d: %02X:%02X:%02X:%02X:%02X:%02X\n", pi,
2648                                 mac_addr.addr_bytes[0], mac_addr.addr_bytes[1],
2649                                 mac_addr.addr_bytes[2], mac_addr.addr_bytes[3],
2650                                 mac_addr.addr_bytes[4], mac_addr.addr_bytes[5]);
2651
2652                 /* at least one port started, need checking link status */
2653                 need_check_link_status = 1;
2654
2655                 pl[cfg_pi++] = pi;
2656         }
2657
2658         if (need_check_link_status == 1 && !no_link_check)
2659                 check_all_ports_link_status(RTE_PORT_ALL);
2660         else if (need_check_link_status == 0)
2661                 printf("Please stop the ports first\n");
2662
2663         if (hairpin_mode & 0xf) {
2664                 uint16_t i;
2665                 int j;
2666
2667                 /* bind all started hairpin ports */
2668                 for (i = 0; i < cfg_pi; i++) {
2669                         pi = pl[i];
2670                         /* bind current Tx to all peer Rx */
2671                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2672                                                         RTE_MAX_ETHPORTS, 1);
2673                         if (peer_pi < 0)
2674                                 return peer_pi;
2675                         for (j = 0; j < peer_pi; j++) {
2676                                 if (!port_is_started(peer_pl[j]))
2677                                         continue;
2678                                 diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2679                                 if (diag < 0) {
2680                                         printf("Error during binding hairpin"
2681                                                " Tx port %u to %u: %s\n",
2682                                                pi, peer_pl[j],
2683                                                rte_strerror(-diag));
2684                                         return -1;
2685                                 }
2686                         }
2687                         /* bind all peer Tx to current Rx */
2688                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2689                                                         RTE_MAX_ETHPORTS, 0);
2690                         if (peer_pi < 0)
2691                                 return peer_pi;
2692                         for (j = 0; j < peer_pi; j++) {
2693                                 if (!port_is_started(peer_pl[j]))
2694                                         continue;
2695                                 diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2696                                 if (diag < 0) {
2697                                         printf("Error during binding hairpin"
2698                                                " Tx port %u to %u: %s\n",
2699                                                peer_pl[j], pi,
2700                                                rte_strerror(-diag));
2701                                         return -1;
2702                                 }
2703                         }
2704                 }
2705         }
2706
2707         printf("Done\n");
2708         return 0;
2709 }
2710
2711 void
2712 stop_port(portid_t pid)
2713 {
2714         portid_t pi;
2715         struct rte_port *port;
2716         int need_check_link_status = 0;
2717         portid_t peer_pl[RTE_MAX_ETHPORTS];
2718         int peer_pi;
2719
2720         if (dcb_test) {
2721                 dcb_test = 0;
2722                 dcb_config = 0;
2723         }
2724
2725         if (port_id_is_invalid(pid, ENABLED_WARN))
2726                 return;
2727
2728         printf("Stopping ports...\n");
2729
2730         RTE_ETH_FOREACH_DEV(pi) {
2731                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2732                         continue;
2733
2734                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
2735                         printf("Please remove port %d from forwarding configuration.\n", pi);
2736                         continue;
2737                 }
2738
2739                 if (port_is_bonding_slave(pi)) {
2740                         printf("Please remove port %d from bonded device.\n", pi);
2741                         continue;
2742                 }
2743
2744                 port = &ports[pi];
2745                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
2746                                                 RTE_PORT_HANDLING) == 0)
2747                         continue;
2748
2749                 if (hairpin_mode & 0xf) {
2750                         int j;
2751
2752                         rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
2753                         /* unbind all peer Tx from current Rx */
2754                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2755                                                         RTE_MAX_ETHPORTS, 0);
2756                         if (peer_pi < 0)
2757                                 continue;
2758                         for (j = 0; j < peer_pi; j++) {
2759                                 if (!port_is_started(peer_pl[j]))
2760                                         continue;
2761                                 rte_eth_hairpin_unbind(peer_pl[j], pi);
2762                         }
2763                 }
2764
2765                 if (port->flow_list)
2766                         port_flow_flush(pi);
2767
2768                 if (rte_eth_dev_stop(pi) != 0)
2769                         RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
2770                                 pi);
2771
2772                 if (rte_atomic16_cmpset(&(port->port_status),
2773                         RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2774                         printf("Port %d can not be set into stopped\n", pi);
2775                 need_check_link_status = 1;
2776         }
2777         if (need_check_link_status && !no_link_check)
2778                 check_all_ports_link_status(RTE_PORT_ALL);
2779
2780         printf("Done\n");
2781 }
2782
2783 static void
2784 remove_invalid_ports_in(portid_t *array, portid_t *total)
2785 {
2786         portid_t i;
2787         portid_t new_total = 0;
2788
2789         for (i = 0; i < *total; i++)
2790                 if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
2791                         array[new_total] = array[i];
2792                         new_total++;
2793                 }
2794         *total = new_total;
2795 }
2796
2797 static void
2798 remove_invalid_ports(void)
2799 {
2800         remove_invalid_ports_in(ports_ids, &nb_ports);
2801         remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
2802         nb_cfg_ports = nb_fwd_ports;
2803 }
2804
2805 void
2806 close_port(portid_t pid)
2807 {
2808         portid_t pi;
2809         struct rte_port *port;
2810
2811         if (port_id_is_invalid(pid, ENABLED_WARN))
2812                 return;
2813
2814         printf("Closing ports...\n");
2815
2816         RTE_ETH_FOREACH_DEV(pi) {
2817                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2818                         continue;
2819
2820                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
2821                         printf("Please remove port %d from forwarding configuration.\n", pi);
2822                         continue;
2823                 }
2824
2825                 if (port_is_bonding_slave(pi)) {
2826                         printf("Please remove port %d from bonded device.\n", pi);
2827                         continue;
2828                 }
2829
2830                 port = &ports[pi];
2831                 if (rte_atomic16_cmpset(&(port->port_status),
2832                         RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
2833                         printf("Port %d is already closed\n", pi);
2834                         continue;
2835                 }
2836
2837                 port_flow_flush(pi);
2838                 rte_eth_dev_close(pi);
2839         }
2840
2841         remove_invalid_ports();
2842         printf("Done\n");
2843 }
2844
2845 void
2846 reset_port(portid_t pid)
2847 {
2848         int diag;
2849         portid_t pi;
2850         struct rte_port *port;
2851
2852         if (port_id_is_invalid(pid, ENABLED_WARN))
2853                 return;
2854
2855         if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
2856                 (pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
2857                 printf("Can not reset port(s), please stop port(s) first.\n");
2858                 return;
2859         }
2860
2861         printf("Resetting ports...\n");
2862
2863         RTE_ETH_FOREACH_DEV(pi) {
2864                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2865                         continue;
2866
2867                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
2868                         printf("Please remove port %d from forwarding "
2869                                "configuration.\n", pi);
2870                         continue;
2871                 }
2872
2873                 if (port_is_bonding_slave(pi)) {
2874                         printf("Please remove port %d from bonded device.\n",
2875                                pi);
2876                         continue;
2877                 }
2878
2879                 diag = rte_eth_dev_reset(pi);
2880                 if (diag == 0) {
2881                         port = &ports[pi];
2882                         port->need_reconfig = 1;
2883                         port->need_reconfig_queues = 1;
2884                 } else {
2885                         printf("Failed to reset port %d. diag=%d\n", pi, diag);
2886                 }
2887         }
2888
2889         printf("Done\n");
2890 }
2891
2892 void
2893 attach_port(char *identifier)
2894 {
2895         portid_t pi;
2896         struct rte_dev_iterator iterator;
2897
2898         printf("Attaching a new port...\n");
2899
2900         if (identifier == NULL) {
2901                 printf("Invalid parameters are specified\n");
2902                 return;
2903         }
2904
2905         if (rte_dev_probe(identifier) < 0) {
2906                 TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
2907                 return;
2908         }
2909
2910         /* first attach mode: event */
2911         if (setup_on_probe_event) {
2912                 /* new ports are detected on RTE_ETH_EVENT_NEW event */
2913                 for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
2914                         if (ports[pi].port_status == RTE_PORT_HANDLING &&
2915                                         ports[pi].need_setup != 0)
2916                                 setup_attached_port(pi);
2917                 return;
2918         }
2919
2920         /* second attach mode: iterator */
2921         RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
2922                 /* setup ports matching the devargs used for probing */
2923                 if (port_is_forwarding(pi))
2924                         continue; /* port was already attached before */
2925                 setup_attached_port(pi);
2926         }
2927 }
2928
2929 static void
2930 setup_attached_port(portid_t pi)
2931 {
2932         unsigned int socket_id;
2933         int ret;
2934
2935         socket_id = (unsigned)rte_eth_dev_socket_id(pi);
2936         /* if socket_id is invalid, set to the first available socket. */
2937         if (check_socket_id(socket_id) < 0)
2938                 socket_id = socket_ids[0];
2939         reconfig(pi, socket_id);
2940         ret = rte_eth_promiscuous_enable(pi);
2941         if (ret != 0)
2942                 printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
2943                         pi, rte_strerror(-ret));
2944
2945         ports_ids[nb_ports++] = pi;
2946         fwd_ports_ids[nb_fwd_ports++] = pi;
2947         nb_cfg_ports = nb_fwd_ports;
2948         ports[pi].need_setup = 0;
2949         ports[pi].port_status = RTE_PORT_STOPPED;
2950
2951         printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
2952         printf("Done\n");
2953 }
2954
2955 static void
2956 detach_device(struct rte_device *dev)
2957 {
2958         portid_t sibling;
2959
2960         if (dev == NULL) {
2961                 printf("Device already removed\n");
2962                 return;
2963         }
2964
2965         printf("Removing a device...\n");
2966
2967         RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
2968                 if (ports[sibling].port_status != RTE_PORT_CLOSED) {
2969                         if (ports[sibling].port_status != RTE_PORT_STOPPED) {
2970                                 printf("Port %u not stopped\n", sibling);
2971                                 return;
2972                         }
2973                         port_flow_flush(sibling);
2974                 }
2975         }
2976
2977         if (rte_dev_remove(dev) < 0) {
2978                 TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
2979                 return;
2980         }
2981         remove_invalid_ports();
2982
2983         printf("Device is detached\n");
2984         printf("Now total ports is %d\n", nb_ports);
2985         printf("Done\n");
2986         return;
2987 }
2988
2989 void
2990 detach_port_device(portid_t port_id)
2991 {
2992         if (port_id_is_invalid(port_id, ENABLED_WARN))
2993                 return;
2994
2995         if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2996                 if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2997                         printf("Port not stopped\n");
2998                         return;
2999                 }
3000                 printf("Port was not closed\n");
3001         }
3002
3003         detach_device(rte_eth_devices[port_id].device);
3004 }
3005
3006 void
3007 detach_devargs(char *identifier)
3008 {
3009         struct rte_dev_iterator iterator;
3010         struct rte_devargs da;
3011         portid_t port_id;
3012
3013         printf("Removing a device...\n");
3014
3015         memset(&da, 0, sizeof(da));
3016         if (rte_devargs_parsef(&da, "%s", identifier)) {
3017                 printf("cannot parse identifier\n");
3018                 return;
3019         }
3020
3021         RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3022                 if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3023                         if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3024                                 printf("Port %u not stopped\n", port_id);
3025                                 rte_eth_iterator_cleanup(&iterator);
3026                                 rte_devargs_reset(&da);
3027                                 return;
3028                         }
3029                         port_flow_flush(port_id);
3030                 }
3031         }
3032
3033         if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3034                 TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3035                             da.name, da.bus->name);
3036                 rte_devargs_reset(&da);
3037                 return;
3038         }
3039
3040         remove_invalid_ports();
3041
3042         printf("Device %s is detached\n", identifier);
3043         printf("Now total ports is %d\n", nb_ports);
3044         printf("Done\n");
3045         rte_devargs_reset(&da);
3046 }
3047
3048 void
3049 pmd_test_exit(void)
3050 {
3051         portid_t pt_id;
3052         unsigned int i;
3053         int ret;
3054
3055         if (test_done == 0)
3056                 stop_packet_forwarding();
3057
3058         for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3059                 if (mempools[i]) {
3060                         if (mp_alloc_type == MP_ALLOC_ANON)
3061                                 rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3062                                                      NULL);
3063                 }
3064         }
3065         if (ports != NULL) {
3066                 no_link_check = 1;
3067                 RTE_ETH_FOREACH_DEV(pt_id) {
3068                         printf("\nStopping port %d...\n", pt_id);
3069                         fflush(stdout);
3070                         stop_port(pt_id);
3071                 }
3072                 RTE_ETH_FOREACH_DEV(pt_id) {
3073                         printf("\nShutting down port %d...\n", pt_id);
3074                         fflush(stdout);
3075                         close_port(pt_id);
3076                 }
3077         }
3078
3079         if (hot_plug) {
3080                 ret = rte_dev_event_monitor_stop();
3081                 if (ret) {
3082                         RTE_LOG(ERR, EAL,
3083                                 "fail to stop device event monitor.");
3084                         return;
3085                 }
3086
3087                 ret = rte_dev_event_callback_unregister(NULL,
3088                         dev_event_callback, NULL);
3089                 if (ret < 0) {
3090                         RTE_LOG(ERR, EAL,
3091                                 "fail to unregister device event callback.\n");
3092                         return;
3093                 }
3094
3095                 ret = rte_dev_hotplug_handle_disable();
3096                 if (ret) {
3097                         RTE_LOG(ERR, EAL,
3098                                 "fail to disable hotplug handling.\n");
3099                         return;
3100                 }
3101         }
3102         for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3103                 if (mempools[i])
3104                         rte_mempool_free(mempools[i]);
3105         }
3106
3107         printf("\nBye...\n");
3108 }
3109
3110 typedef void (*cmd_func_t)(void);
3111 struct pmd_test_command {
3112         const char *cmd_name;
3113         cmd_func_t cmd_func;
3114 };
3115
3116 /* Check the link status of all ports in up to 9s, and print them finally */
3117 static void
3118 check_all_ports_link_status(uint32_t port_mask)
3119 {
3120 #define CHECK_INTERVAL 100 /* 100ms */
3121 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3122         portid_t portid;
3123         uint8_t count, all_ports_up, print_flag = 0;
3124         struct rte_eth_link link;
3125         int ret;
3126         char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3127
3128         printf("Checking link statuses...\n");
3129         fflush(stdout);
3130         for (count = 0; count <= MAX_CHECK_TIME; count++) {
3131                 all_ports_up = 1;
3132                 RTE_ETH_FOREACH_DEV(portid) {
3133                         if ((port_mask & (1 << portid)) == 0)
3134                                 continue;
3135                         memset(&link, 0, sizeof(link));
3136                         ret = rte_eth_link_get_nowait(portid, &link);
3137                         if (ret < 0) {
3138                                 all_ports_up = 0;
3139                                 if (print_flag == 1)
3140                                         printf("Port %u link get failed: %s\n",
3141                                                 portid, rte_strerror(-ret));
3142                                 continue;
3143                         }
3144                         /* print link status if flag set */
3145                         if (print_flag == 1) {
3146                                 rte_eth_link_to_str(link_status,
3147                                         sizeof(link_status), &link);
3148                                 printf("Port %d %s\n", portid, link_status);
3149                                 continue;
3150                         }
3151                         /* clear all_ports_up flag if any link down */
3152                         if (link.link_status == ETH_LINK_DOWN) {
3153                                 all_ports_up = 0;
3154                                 break;
3155                         }
3156                 }
3157                 /* after finally printing all link status, get out */
3158                 if (print_flag == 1)
3159                         break;
3160
3161                 if (all_ports_up == 0) {
3162                         fflush(stdout);
3163                         rte_delay_ms(CHECK_INTERVAL);
3164                 }
3165
3166                 /* set the print_flag if all ports up or timeout */
3167                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3168                         print_flag = 1;
3169                 }
3170
3171                 if (lsc_interrupt)
3172                         break;
3173         }
3174 }
3175
3176 static void
3177 rmv_port_callback(void *arg)
3178 {
3179         int need_to_start = 0;
3180         int org_no_link_check = no_link_check;
3181         portid_t port_id = (intptr_t)arg;
3182         struct rte_device *dev;
3183
3184         RTE_ETH_VALID_PORTID_OR_RET(port_id);
3185
3186         if (!test_done && port_is_forwarding(port_id)) {
3187                 need_to_start = 1;
3188                 stop_packet_forwarding();
3189         }
3190         no_link_check = 1;
3191         stop_port(port_id);
3192         no_link_check = org_no_link_check;
3193
3194         /* Save rte_device pointer before closing ethdev port */
3195         dev = rte_eth_devices[port_id].device;
3196         close_port(port_id);
3197         detach_device(dev); /* might be already removed or have more ports */
3198
3199         if (need_to_start)
3200                 start_packet_forwarding(0);
3201 }
3202
3203 /* This function is used by the interrupt thread */
3204 static int
3205 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3206                   void *ret_param)
3207 {
3208         RTE_SET_USED(param);
3209         RTE_SET_USED(ret_param);
3210
3211         if (type >= RTE_ETH_EVENT_MAX) {
3212                 fprintf(stderr, "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3213                         port_id, __func__, type);
3214                 fflush(stderr);
3215         } else if (event_print_mask & (UINT32_C(1) << type)) {
3216                 printf("\nPort %" PRIu16 ": %s event\n", port_id,
3217                         eth_event_desc[type]);
3218                 fflush(stdout);
3219         }
3220
3221         switch (type) {
3222         case RTE_ETH_EVENT_NEW:
3223                 ports[port_id].need_setup = 1;
3224                 ports[port_id].port_status = RTE_PORT_HANDLING;
3225                 break;
3226         case RTE_ETH_EVENT_INTR_RMV:
3227                 if (port_id_is_invalid(port_id, DISABLED_WARN))
3228                         break;
3229                 if (rte_eal_alarm_set(100000,
3230                                 rmv_port_callback, (void *)(intptr_t)port_id))
3231                         fprintf(stderr, "Could not set up deferred device removal\n");
3232                 break;
3233         case RTE_ETH_EVENT_DESTROY:
3234                 ports[port_id].port_status = RTE_PORT_CLOSED;
3235                 printf("Port %u is closed\n", port_id);
3236                 break;
3237         default:
3238                 break;
3239         }
3240         return 0;
3241 }
3242
3243 static int
3244 register_eth_event_callback(void)
3245 {
3246         int ret;
3247         enum rte_eth_event_type event;
3248
3249         for (event = RTE_ETH_EVENT_UNKNOWN;
3250                         event < RTE_ETH_EVENT_MAX; event++) {
3251                 ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3252                                 event,
3253                                 eth_event_callback,
3254                                 NULL);
3255                 if (ret != 0) {
3256                         TESTPMD_LOG(ERR, "Failed to register callback for "
3257                                         "%s event\n", eth_event_desc[event]);
3258                         return -1;
3259                 }
3260         }
3261
3262         return 0;
3263 }
3264
3265 /* This function is used by the interrupt thread */
3266 static void
3267 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3268                              __rte_unused void *arg)
3269 {
3270         uint16_t port_id;
3271         int ret;
3272
3273         if (type >= RTE_DEV_EVENT_MAX) {
3274                 fprintf(stderr, "%s called upon invalid event %d\n",
3275                         __func__, type);
3276                 fflush(stderr);
3277         }
3278
3279         switch (type) {
3280         case RTE_DEV_EVENT_REMOVE:
3281                 RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3282                         device_name);
3283                 ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3284                 if (ret) {
3285                         RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3286                                 device_name);
3287                         return;
3288                 }
3289                 /*
3290                  * Because the user's callback is invoked in eal interrupt
3291                  * callback, the interrupt callback need to be finished before
3292                  * it can be unregistered when detaching device. So finish
3293                  * callback soon and use a deferred removal to detach device
3294                  * is need. It is a workaround, once the device detaching be
3295                  * moved into the eal in the future, the deferred removal could
3296                  * be deleted.
3297                  */
3298                 if (rte_eal_alarm_set(100000,
3299                                 rmv_port_callback, (void *)(intptr_t)port_id))
3300                         RTE_LOG(ERR, EAL,
3301                                 "Could not set up deferred device removal\n");
3302                 break;
3303         case RTE_DEV_EVENT_ADD:
3304                 RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3305                         device_name);
3306                 /* TODO: After finish kernel driver binding,
3307                  * begin to attach port.
3308                  */
3309                 break;
3310         default:
3311                 break;
3312         }
3313 }
3314
3315 static void
3316 rxtx_port_config(struct rte_port *port)
3317 {
3318         uint16_t qid;
3319         uint64_t offloads;
3320
3321         for (qid = 0; qid < nb_rxq; qid++) {
3322                 offloads = port->rx_conf[qid].offloads;
3323                 port->rx_conf[qid] = port->dev_info.default_rxconf;
3324                 if (offloads != 0)
3325                         port->rx_conf[qid].offloads = offloads;
3326
3327                 /* Check if any Rx parameters have been passed */
3328                 if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3329                         port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3330
3331                 if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3332                         port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3333
3334                 if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3335                         port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3336
3337                 if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3338                         port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3339
3340                 if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3341                         port->rx_conf[qid].rx_drop_en = rx_drop_en;
3342
3343                 port->nb_rx_desc[qid] = nb_rxd;
3344         }
3345
3346         for (qid = 0; qid < nb_txq; qid++) {
3347                 offloads = port->tx_conf[qid].offloads;
3348                 port->tx_conf[qid] = port->dev_info.default_txconf;
3349                 if (offloads != 0)
3350                         port->tx_conf[qid].offloads = offloads;
3351
3352                 /* Check if any Tx parameters have been passed */
3353                 if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3354                         port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3355
3356                 if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3357                         port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3358
3359                 if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3360                         port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3361
3362                 if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3363                         port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3364
3365                 if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3366                         port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3367
3368                 port->nb_tx_desc[qid] = nb_txd;
3369         }
3370 }
3371
3372 /*
3373  * Helper function to arrange max_rx_pktlen value and JUMBO_FRAME offload,
3374  * MTU is also aligned if JUMBO_FRAME offload is not set.
3375  *
3376  * port->dev_info should be set before calling this function.
3377  *
3378  * return 0 on success, negative on error
3379  */
3380 int
3381 update_jumbo_frame_offload(portid_t portid)
3382 {
3383         struct rte_port *port = &ports[portid];
3384         uint32_t eth_overhead;
3385         uint64_t rx_offloads;
3386         int ret;
3387         bool on;
3388
3389         /* Update the max_rx_pkt_len to have MTU as RTE_ETHER_MTU */
3390         if (port->dev_info.max_mtu != UINT16_MAX &&
3391             port->dev_info.max_rx_pktlen > port->dev_info.max_mtu)
3392                 eth_overhead = port->dev_info.max_rx_pktlen -
3393                                 port->dev_info.max_mtu;
3394         else
3395                 eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
3396
3397         rx_offloads = port->dev_conf.rxmode.offloads;
3398
3399         /* Default config value is 0 to use PMD specific overhead */
3400         if (port->dev_conf.rxmode.max_rx_pkt_len == 0)
3401                 port->dev_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MTU + eth_overhead;
3402
3403         if (port->dev_conf.rxmode.max_rx_pkt_len <= RTE_ETHER_MTU + eth_overhead) {
3404                 rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3405                 on = false;
3406         } else {
3407                 if ((port->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3408                         printf("Frame size (%u) is not supported by port %u\n",
3409                                 port->dev_conf.rxmode.max_rx_pkt_len,
3410                                 portid);
3411                         return -1;
3412                 }
3413                 rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3414                 on = true;
3415         }
3416
3417         if (rx_offloads != port->dev_conf.rxmode.offloads) {
3418                 uint16_t qid;
3419
3420                 port->dev_conf.rxmode.offloads = rx_offloads;
3421
3422                 /* Apply JUMBO_FRAME offload configuration to Rx queue(s) */
3423                 for (qid = 0; qid < port->dev_info.nb_rx_queues; qid++) {
3424                         if (on)
3425                                 port->rx_conf[qid].offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3426                         else
3427                                 port->rx_conf[qid].offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3428                 }
3429         }
3430
3431         /* If JUMBO_FRAME is set MTU conversion done by ethdev layer,
3432          * if unset do it here
3433          */
3434         if ((rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3435                 ret = rte_eth_dev_set_mtu(portid,
3436                                 port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead);
3437                 if (ret)
3438                         printf("Failed to set MTU to %u for port %u\n",
3439                                 port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead,
3440                                 portid);
3441         }
3442
3443         return 0;
3444 }
3445
3446 void
3447 init_port_config(void)
3448 {
3449         portid_t pid;
3450         struct rte_port *port;
3451         int ret;
3452
3453         RTE_ETH_FOREACH_DEV(pid) {
3454                 port = &ports[pid];
3455                 port->dev_conf.fdir_conf = fdir_conf;
3456
3457                 ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3458                 if (ret != 0)
3459                         return;
3460
3461                 if (nb_rxq > 1) {
3462                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3463                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3464                                 rss_hf & port->dev_info.flow_type_rss_offloads;
3465                 } else {
3466                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3467                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3468                 }
3469
3470                 if (port->dcb_flag == 0) {
3471                         if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
3472                                 port->dev_conf.rxmode.mq_mode =
3473                                         (enum rte_eth_rx_mq_mode)
3474                                                 (rx_mq_mode & ETH_MQ_RX_RSS);
3475                         else
3476                                 port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3477                 }
3478
3479                 rxtx_port_config(port);
3480
3481                 ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3482                 if (ret != 0)
3483                         return;
3484
3485 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3486                 rte_pmd_ixgbe_bypass_init(pid);
3487 #endif
3488
3489                 if (lsc_interrupt &&
3490                     (rte_eth_devices[pid].data->dev_flags &
3491                      RTE_ETH_DEV_INTR_LSC))
3492                         port->dev_conf.intr_conf.lsc = 1;
3493                 if (rmv_interrupt &&
3494                     (rte_eth_devices[pid].data->dev_flags &
3495                      RTE_ETH_DEV_INTR_RMV))
3496                         port->dev_conf.intr_conf.rmv = 1;
3497         }
3498 }
3499
3500 void set_port_slave_flag(portid_t slave_pid)
3501 {
3502         struct rte_port *port;
3503
3504         port = &ports[slave_pid];
3505         port->slave_flag = 1;
3506 }
3507
3508 void clear_port_slave_flag(portid_t slave_pid)
3509 {
3510         struct rte_port *port;
3511
3512         port = &ports[slave_pid];
3513         port->slave_flag = 0;
3514 }
3515
3516 uint8_t port_is_bonding_slave(portid_t slave_pid)
3517 {
3518         struct rte_port *port;
3519
3520         port = &ports[slave_pid];
3521         if ((rte_eth_devices[slave_pid].data->dev_flags &
3522             RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3523                 return 1;
3524         return 0;
3525 }
3526
3527 const uint16_t vlan_tags[] = {
3528                 0,  1,  2,  3,  4,  5,  6,  7,
3529                 8,  9, 10, 11,  12, 13, 14, 15,
3530                 16, 17, 18, 19, 20, 21, 22, 23,
3531                 24, 25, 26, 27, 28, 29, 30, 31
3532 };
3533
3534 static  int
3535 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3536                  enum dcb_mode_enable dcb_mode,
3537                  enum rte_eth_nb_tcs num_tcs,
3538                  uint8_t pfc_en)
3539 {
3540         uint8_t i;
3541         int32_t rc;
3542         struct rte_eth_rss_conf rss_conf;
3543
3544         /*
3545          * Builds up the correct configuration for dcb+vt based on the vlan tags array
3546          * given above, and the number of traffic classes available for use.
3547          */
3548         if (dcb_mode == DCB_VT_ENABLED) {
3549                 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3550                                 &eth_conf->rx_adv_conf.vmdq_dcb_conf;
3551                 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3552                                 &eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3553
3554                 /* VMDQ+DCB RX and TX configurations */
3555                 vmdq_rx_conf->enable_default_pool = 0;
3556                 vmdq_rx_conf->default_pool = 0;
3557                 vmdq_rx_conf->nb_queue_pools =
3558                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3559                 vmdq_tx_conf->nb_queue_pools =
3560                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3561
3562                 vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3563                 for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3564                         vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3565                         vmdq_rx_conf->pool_map[i].pools =
3566                                 1 << (i % vmdq_rx_conf->nb_queue_pools);
3567                 }
3568                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3569                         vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3570                         vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3571                 }
3572
3573                 /* set DCB mode of RX and TX of multiple queues */
3574                 eth_conf->rxmode.mq_mode =
3575                                 (enum rte_eth_rx_mq_mode)
3576                                         (rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3577                 eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3578         } else {
3579                 struct rte_eth_dcb_rx_conf *rx_conf =
3580                                 &eth_conf->rx_adv_conf.dcb_rx_conf;
3581                 struct rte_eth_dcb_tx_conf *tx_conf =
3582                                 &eth_conf->tx_adv_conf.dcb_tx_conf;
3583
3584                 memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3585
3586                 rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3587                 if (rc != 0)
3588                         return rc;
3589
3590                 rx_conf->nb_tcs = num_tcs;
3591                 tx_conf->nb_tcs = num_tcs;
3592
3593                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3594                         rx_conf->dcb_tc[i] = i % num_tcs;
3595                         tx_conf->dcb_tc[i] = i % num_tcs;
3596                 }
3597
3598                 eth_conf->rxmode.mq_mode =
3599                                 (enum rte_eth_rx_mq_mode)
3600                                         (rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3601                 eth_conf->rx_adv_conf.rss_conf = rss_conf;
3602                 eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3603         }
3604
3605         if (pfc_en)
3606                 eth_conf->dcb_capability_en =
3607                                 ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3608         else
3609                 eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3610
3611         return 0;
3612 }
3613
3614 int
3615 init_port_dcb_config(portid_t pid,
3616                      enum dcb_mode_enable dcb_mode,
3617                      enum rte_eth_nb_tcs num_tcs,
3618                      uint8_t pfc_en)
3619 {
3620         struct rte_eth_conf port_conf;
3621         struct rte_port *rte_port;
3622         int retval;
3623         uint16_t i;
3624
3625         rte_port = &ports[pid];
3626
3627         memset(&port_conf, 0, sizeof(struct rte_eth_conf));
3628         /* Enter DCB configuration status */
3629         dcb_config = 1;
3630
3631         port_conf.rxmode = rte_port->dev_conf.rxmode;
3632         port_conf.txmode = rte_port->dev_conf.txmode;
3633
3634         /*set configuration of DCB in vt mode and DCB in non-vt mode*/
3635         retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3636         if (retval < 0)
3637                 return retval;
3638         port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3639
3640         /* re-configure the device . */
3641         retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3642         if (retval < 0)
3643                 return retval;
3644
3645         retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3646         if (retval != 0)
3647                 return retval;
3648
3649         /* If dev_info.vmdq_pool_base is greater than 0,
3650          * the queue id of vmdq pools is started after pf queues.
3651          */
3652         if (dcb_mode == DCB_VT_ENABLED &&
3653             rte_port->dev_info.vmdq_pool_base > 0) {
3654                 printf("VMDQ_DCB multi-queue mode is nonsensical"
3655                         " for port %d.", pid);
3656                 return -1;
3657         }
3658
3659         /* Assume the ports in testpmd have the same dcb capability
3660          * and has the same number of rxq and txq in dcb mode
3661          */
3662         if (dcb_mode == DCB_VT_ENABLED) {
3663                 if (rte_port->dev_info.max_vfs > 0) {
3664                         nb_rxq = rte_port->dev_info.nb_rx_queues;
3665                         nb_txq = rte_port->dev_info.nb_tx_queues;
3666                 } else {
3667                         nb_rxq = rte_port->dev_info.max_rx_queues;
3668                         nb_txq = rte_port->dev_info.max_tx_queues;
3669                 }
3670         } else {
3671                 /*if vt is disabled, use all pf queues */
3672                 if (rte_port->dev_info.vmdq_pool_base == 0) {
3673                         nb_rxq = rte_port->dev_info.max_rx_queues;
3674                         nb_txq = rte_port->dev_info.max_tx_queues;
3675                 } else {
3676                         nb_rxq = (queueid_t)num_tcs;
3677                         nb_txq = (queueid_t)num_tcs;
3678
3679                 }
3680         }
3681         rx_free_thresh = 64;
3682
3683         memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3684
3685         rxtx_port_config(rte_port);
3686         /* VLAN filter */
3687         rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3688         for (i = 0; i < RTE_DIM(vlan_tags); i++)
3689                 rx_vft_set(pid, vlan_tags[i], 1);
3690
3691         retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
3692         if (retval != 0)
3693                 return retval;
3694
3695         rte_port->dcb_flag = 1;
3696
3697         return 0;
3698 }
3699
3700 static void
3701 init_port(void)
3702 {
3703         int i;
3704
3705         /* Configuration of Ethernet ports. */
3706         ports = rte_zmalloc("testpmd: ports",
3707                             sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
3708                             RTE_CACHE_LINE_SIZE);
3709         if (ports == NULL) {
3710                 rte_exit(EXIT_FAILURE,
3711                                 "rte_zmalloc(%d struct rte_port) failed\n",
3712                                 RTE_MAX_ETHPORTS);
3713         }
3714         for (i = 0; i < RTE_MAX_ETHPORTS; i++)
3715                 LIST_INIT(&ports[i].flow_tunnel_list);
3716         /* Initialize ports NUMA structures */
3717         memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3718         memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3719         memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3720 }
3721
3722 static void
3723 force_quit(void)
3724 {
3725         pmd_test_exit();
3726         prompt_exit();
3727 }
3728
3729 static void
3730 print_stats(void)
3731 {
3732         uint8_t i;
3733         const char clr[] = { 27, '[', '2', 'J', '\0' };
3734         const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
3735
3736         /* Clear screen and move to top left */
3737         printf("%s%s", clr, top_left);
3738
3739         printf("\nPort statistics ====================================");
3740         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
3741                 nic_stats_display(fwd_ports_ids[i]);
3742
3743         fflush(stdout);
3744 }
3745
3746 static void
3747 signal_handler(int signum)
3748 {
3749         if (signum == SIGINT || signum == SIGTERM) {
3750                 printf("\nSignal %d received, preparing to exit...\n",
3751                                 signum);
3752 #ifdef RTE_LIB_PDUMP
3753                 /* uninitialize packet capture framework */
3754                 rte_pdump_uninit();
3755 #endif
3756 #ifdef RTE_LIB_LATENCYSTATS
3757                 if (latencystats_enabled != 0)
3758                         rte_latencystats_uninit();
3759 #endif
3760                 force_quit();
3761                 /* Set flag to indicate the force termination. */
3762                 f_quit = 1;
3763                 /* exit with the expected status */
3764                 signal(signum, SIG_DFL);
3765                 kill(getpid(), signum);
3766         }
3767 }
3768
3769 int
3770 main(int argc, char** argv)
3771 {
3772         int diag;
3773         portid_t port_id;
3774         uint16_t count;
3775         int ret;
3776
3777         signal(SIGINT, signal_handler);
3778         signal(SIGTERM, signal_handler);
3779
3780         testpmd_logtype = rte_log_register("testpmd");
3781         if (testpmd_logtype < 0)
3782                 rte_exit(EXIT_FAILURE, "Cannot register log type");
3783         rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
3784
3785         diag = rte_eal_init(argc, argv);
3786         if (diag < 0)
3787                 rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
3788                          rte_strerror(rte_errno));
3789
3790         if (rte_eal_process_type() == RTE_PROC_SECONDARY)
3791                 rte_exit(EXIT_FAILURE,
3792                          "Secondary process type not supported.\n");
3793
3794         ret = register_eth_event_callback();
3795         if (ret != 0)
3796                 rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
3797
3798 #ifdef RTE_LIB_PDUMP
3799         /* initialize packet capture framework */
3800         rte_pdump_init();
3801 #endif
3802
3803         count = 0;
3804         RTE_ETH_FOREACH_DEV(port_id) {
3805                 ports_ids[count] = port_id;
3806                 count++;
3807         }
3808         nb_ports = (portid_t) count;
3809         if (nb_ports == 0)
3810                 TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
3811
3812         /* allocate port structures, and init them */
3813         init_port();
3814
3815         set_def_fwd_config();
3816         if (nb_lcores == 0)
3817                 rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
3818                          "Check the core mask argument\n");
3819
3820         /* Bitrate/latency stats disabled by default */
3821 #ifdef RTE_LIB_BITRATESTATS
3822         bitrate_enabled = 0;
3823 #endif
3824 #ifdef RTE_LIB_LATENCYSTATS
3825         latencystats_enabled = 0;
3826 #endif
3827
3828         /* on FreeBSD, mlockall() is disabled by default */
3829 #ifdef RTE_EXEC_ENV_FREEBSD
3830         do_mlockall = 0;
3831 #else
3832         do_mlockall = 1;
3833 #endif
3834
3835         argc -= diag;
3836         argv += diag;
3837         if (argc > 1)
3838                 launch_args_parse(argc, argv);
3839
3840         if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
3841                 TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
3842                         strerror(errno));
3843         }
3844
3845         if (tx_first && interactive)
3846                 rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
3847                                 "interactive mode.\n");
3848
3849         if (tx_first && lsc_interrupt) {
3850                 printf("Warning: lsc_interrupt needs to be off when "
3851                                 " using tx_first. Disabling.\n");
3852                 lsc_interrupt = 0;
3853         }
3854
3855         if (!nb_rxq && !nb_txq)
3856                 printf("Warning: Either rx or tx queues should be non-zero\n");
3857
3858         if (nb_rxq > 1 && nb_rxq > nb_txq)
3859                 printf("Warning: nb_rxq=%d enables RSS configuration, "
3860                        "but nb_txq=%d will prevent to fully test it.\n",
3861                        nb_rxq, nb_txq);
3862
3863         init_config();
3864
3865         if (hot_plug) {
3866                 ret = rte_dev_hotplug_handle_enable();
3867                 if (ret) {
3868                         RTE_LOG(ERR, EAL,
3869                                 "fail to enable hotplug handling.");
3870                         return -1;
3871                 }
3872
3873                 ret = rte_dev_event_monitor_start();
3874                 if (ret) {
3875                         RTE_LOG(ERR, EAL,
3876                                 "fail to start device event monitoring.");
3877                         return -1;
3878                 }
3879
3880                 ret = rte_dev_event_callback_register(NULL,
3881                         dev_event_callback, NULL);
3882                 if (ret) {
3883                         RTE_LOG(ERR, EAL,
3884                                 "fail  to register device event callback\n");
3885                         return -1;
3886                 }
3887         }
3888
3889         if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
3890                 rte_exit(EXIT_FAILURE, "Start ports failed\n");
3891
3892         /* set all ports to promiscuous mode by default */
3893         RTE_ETH_FOREACH_DEV(port_id) {
3894                 ret = rte_eth_promiscuous_enable(port_id);
3895                 if (ret != 0)
3896                         printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
3897                                 port_id, rte_strerror(-ret));
3898         }
3899
3900         /* Init metrics library */
3901         rte_metrics_init(rte_socket_id());
3902
3903 #ifdef RTE_LIB_LATENCYSTATS
3904         if (latencystats_enabled != 0) {
3905                 int ret = rte_latencystats_init(1, NULL);
3906                 if (ret)
3907                         printf("Warning: latencystats init()"
3908                                 " returned error %d\n", ret);
3909                 printf("Latencystats running on lcore %d\n",
3910                         latencystats_lcore_id);
3911         }
3912 #endif
3913
3914         /* Setup bitrate stats */
3915 #ifdef RTE_LIB_BITRATESTATS
3916         if (bitrate_enabled != 0) {
3917                 bitrate_data = rte_stats_bitrate_create();
3918                 if (bitrate_data == NULL)
3919                         rte_exit(EXIT_FAILURE,
3920                                 "Could not allocate bitrate data.\n");
3921                 rte_stats_bitrate_reg(bitrate_data);
3922         }
3923 #endif
3924
3925 #ifdef RTE_LIB_CMDLINE
3926         if (strlen(cmdline_filename) != 0)
3927                 cmdline_read_from_file(cmdline_filename);
3928
3929         if (interactive == 1) {
3930                 if (auto_start) {
3931                         printf("Start automatic packet forwarding\n");
3932                         start_packet_forwarding(0);
3933                 }
3934                 prompt();
3935                 pmd_test_exit();
3936         } else
3937 #endif
3938         {
3939                 char c;
3940                 int rc;
3941
3942                 f_quit = 0;
3943
3944                 printf("No commandline core given, start packet forwarding\n");
3945                 start_packet_forwarding(tx_first);
3946                 if (stats_period != 0) {
3947                         uint64_t prev_time = 0, cur_time, diff_time = 0;
3948                         uint64_t timer_period;
3949
3950                         /* Convert to number of cycles */
3951                         timer_period = stats_period * rte_get_timer_hz();
3952
3953                         while (f_quit == 0) {
3954                                 cur_time = rte_get_timer_cycles();
3955                                 diff_time += cur_time - prev_time;
3956
3957                                 if (diff_time >= timer_period) {
3958                                         print_stats();
3959                                         /* Reset the timer */
3960                                         diff_time = 0;
3961                                 }
3962                                 /* Sleep to avoid unnecessary checks */
3963                                 prev_time = cur_time;
3964                                 sleep(1);
3965                         }
3966                 }
3967
3968                 printf("Press enter to exit\n");
3969                 rc = read(0, &c, 1);
3970                 pmd_test_exit();
3971                 if (rc < 0)
3972                         return 1;
3973         }
3974
3975         ret = rte_eal_cleanup();
3976         if (ret != 0)
3977                 rte_exit(EXIT_FAILURE,
3978                          "EAL cleanup failed: %s\n", strerror(-ret));
3979
3980         return EXIT_SUCCESS;
3981 }