app/testpmd: fix hot-unplug detaching
[dpdk.git] / app / test-pmd / testpmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #include <sys/mman.h>
13 #include <sys/types.h>
14 #include <errno.h>
15 #include <stdbool.h>
16
17 #include <sys/queue.h>
18 #include <sys/stat.h>
19
20 #include <stdint.h>
21 #include <unistd.h>
22 #include <inttypes.h>
23
24 #include <rte_common.h>
25 #include <rte_errno.h>
26 #include <rte_byteorder.h>
27 #include <rte_log.h>
28 #include <rte_debug.h>
29 #include <rte_cycles.h>
30 #include <rte_memory.h>
31 #include <rte_memcpy.h>
32 #include <rte_launch.h>
33 #include <rte_eal.h>
34 #include <rte_alarm.h>
35 #include <rte_per_lcore.h>
36 #include <rte_lcore.h>
37 #include <rte_atomic.h>
38 #include <rte_branch_prediction.h>
39 #include <rte_mempool.h>
40 #include <rte_malloc.h>
41 #include <rte_mbuf.h>
42 #include <rte_mbuf_pool_ops.h>
43 #include <rte_interrupts.h>
44 #include <rte_pci.h>
45 #include <rte_ether.h>
46 #include <rte_ethdev.h>
47 #include <rte_dev.h>
48 #include <rte_string_fns.h>
49 #ifdef RTE_LIBRTE_IXGBE_PMD
50 #include <rte_pmd_ixgbe.h>
51 #endif
52 #ifdef RTE_LIBRTE_PDUMP
53 #include <rte_pdump.h>
54 #endif
55 #include <rte_flow.h>
56 #include <rte_metrics.h>
57 #ifdef RTE_LIBRTE_BITRATE
58 #include <rte_bitrate.h>
59 #endif
60 #ifdef RTE_LIBRTE_LATENCY_STATS
61 #include <rte_latencystats.h>
62 #endif
63
64 #include "testpmd.h"
65
66 #ifndef MAP_HUGETLB
67 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
68 #define HUGE_FLAG (0x40000)
69 #else
70 #define HUGE_FLAG MAP_HUGETLB
71 #endif
72
73 #ifndef MAP_HUGE_SHIFT
74 /* older kernels (or FreeBSD) will not have this define */
75 #define HUGE_SHIFT (26)
76 #else
77 #define HUGE_SHIFT MAP_HUGE_SHIFT
78 #endif
79
80 #define EXTMEM_HEAP_NAME "extmem"
81 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
82
83 uint16_t verbose_level = 0; /**< Silent by default. */
84 int testpmd_logtype; /**< Log type for testpmd logs */
85
86 /* use master core for command line ? */
87 uint8_t interactive = 0;
88 uint8_t auto_start = 0;
89 uint8_t tx_first;
90 char cmdline_filename[PATH_MAX] = {0};
91
92 /*
93  * NUMA support configuration.
94  * When set, the NUMA support attempts to dispatch the allocation of the
95  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
96  * probed ports among the CPU sockets 0 and 1.
97  * Otherwise, all memory is allocated from CPU socket 0.
98  */
99 uint8_t numa_support = 1; /**< numa enabled by default */
100
101 /*
102  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
103  * not configured.
104  */
105 uint8_t socket_num = UMA_NO_CONFIG;
106
107 /*
108  * Select mempool allocation type:
109  * - native: use regular DPDK memory
110  * - anon: use regular DPDK memory to create mempool, but populate using
111  *         anonymous memory (may not be IOVA-contiguous)
112  * - xmem: use externally allocated hugepage memory
113  */
114 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
115
116 /*
117  * Store specified sockets on which memory pool to be used by ports
118  * is allocated.
119  */
120 uint8_t port_numa[RTE_MAX_ETHPORTS];
121
122 /*
123  * Store specified sockets on which RX ring to be used by ports
124  * is allocated.
125  */
126 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
127
128 /*
129  * Store specified sockets on which TX ring to be used by ports
130  * is allocated.
131  */
132 uint8_t txring_numa[RTE_MAX_ETHPORTS];
133
134 /*
135  * Record the Ethernet address of peer target ports to which packets are
136  * forwarded.
137  * Must be instantiated with the ethernet addresses of peer traffic generator
138  * ports.
139  */
140 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
141 portid_t nb_peer_eth_addrs = 0;
142
143 /*
144  * Probed Target Environment.
145  */
146 struct rte_port *ports;        /**< For all probed ethernet ports. */
147 portid_t nb_ports;             /**< Number of probed ethernet ports. */
148 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
149 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
150
151 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
152
153 /*
154  * Test Forwarding Configuration.
155  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
156  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
157  */
158 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
159 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
160 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
161 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
162
163 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
164 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
165
166 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
167 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
168
169 /*
170  * Forwarding engines.
171  */
172 struct fwd_engine * fwd_engines[] = {
173         &io_fwd_engine,
174         &mac_fwd_engine,
175         &mac_swap_engine,
176         &flow_gen_engine,
177         &rx_only_engine,
178         &tx_only_engine,
179         &csum_fwd_engine,
180         &icmp_echo_engine,
181         &noisy_vnf_engine,
182 #if defined RTE_LIBRTE_PMD_SOFTNIC
183         &softnic_fwd_engine,
184 #endif
185 #ifdef RTE_LIBRTE_IEEE1588
186         &ieee1588_fwd_engine,
187 #endif
188         NULL,
189 };
190
191 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES];
192 uint16_t mempool_flags;
193
194 struct fwd_config cur_fwd_config;
195 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
196 uint32_t retry_enabled;
197 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
198 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
199
200 uint16_t mbuf_data_size = DEFAULT_MBUF_DATA_SIZE; /**< Mbuf data space size. */
201 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
202                                       * specified on command-line. */
203 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
204
205 /*
206  * In container, it cannot terminate the process which running with 'stats-period'
207  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
208  */
209 uint8_t f_quit;
210
211 /*
212  * Configuration of packet segments used by the "txonly" processing engine.
213  */
214 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
215 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
216         TXONLY_DEF_PACKET_LEN,
217 };
218 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
219
220 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
221 /**< Split policy for packets to TX. */
222
223 uint8_t txonly_multi_flow;
224 /**< Whether multiple flows are generated in TXONLY mode. */
225
226 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
227 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
228
229 /* current configuration is in DCB or not,0 means it is not in DCB mode */
230 uint8_t dcb_config = 0;
231
232 /* Whether the dcb is in testing status */
233 uint8_t dcb_test = 0;
234
235 /*
236  * Configurable number of RX/TX queues.
237  */
238 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
239 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
240 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
241
242 /*
243  * Configurable number of RX/TX ring descriptors.
244  * Defaults are supplied by drivers via ethdev.
245  */
246 #define RTE_TEST_RX_DESC_DEFAULT 0
247 #define RTE_TEST_TX_DESC_DEFAULT 0
248 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
249 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
250
251 #define RTE_PMD_PARAM_UNSET -1
252 /*
253  * Configurable values of RX and TX ring threshold registers.
254  */
255
256 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
257 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
258 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
259
260 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
261 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
262 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
263
264 /*
265  * Configurable value of RX free threshold.
266  */
267 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
268
269 /*
270  * Configurable value of RX drop enable.
271  */
272 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
273
274 /*
275  * Configurable value of TX free threshold.
276  */
277 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
278
279 /*
280  * Configurable value of TX RS bit threshold.
281  */
282 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
283
284 /*
285  * Configurable value of buffered packets before sending.
286  */
287 uint16_t noisy_tx_sw_bufsz;
288
289 /*
290  * Configurable value of packet buffer timeout.
291  */
292 uint16_t noisy_tx_sw_buf_flush_time;
293
294 /*
295  * Configurable value for size of VNF internal memory area
296  * used for simulating noisy neighbour behaviour
297  */
298 uint64_t noisy_lkup_mem_sz;
299
300 /*
301  * Configurable value of number of random writes done in
302  * VNF simulation memory area.
303  */
304 uint64_t noisy_lkup_num_writes;
305
306 /*
307  * Configurable value of number of random reads done in
308  * VNF simulation memory area.
309  */
310 uint64_t noisy_lkup_num_reads;
311
312 /*
313  * Configurable value of number of random reads/writes done in
314  * VNF simulation memory area.
315  */
316 uint64_t noisy_lkup_num_reads_writes;
317
318 /*
319  * Receive Side Scaling (RSS) configuration.
320  */
321 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
322
323 /*
324  * Port topology configuration
325  */
326 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
327
328 /*
329  * Avoids to flush all the RX streams before starts forwarding.
330  */
331 uint8_t no_flush_rx = 0; /* flush by default */
332
333 /*
334  * Flow API isolated mode.
335  */
336 uint8_t flow_isolate_all;
337
338 /*
339  * Avoids to check link status when starting/stopping a port.
340  */
341 uint8_t no_link_check = 0; /* check by default */
342
343 /*
344  * Don't automatically start all ports in interactive mode.
345  */
346 uint8_t no_device_start = 0;
347
348 /*
349  * Enable link status change notification
350  */
351 uint8_t lsc_interrupt = 1; /* enabled by default */
352
353 /*
354  * Enable device removal notification.
355  */
356 uint8_t rmv_interrupt = 1; /* enabled by default */
357
358 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
359
360 /* After attach, port setup is called on event or by iterator */
361 bool setup_on_probe_event = true;
362
363 /* Clear ptypes on port initialization. */
364 uint8_t clear_ptypes = true;
365
366 /* Pretty printing of ethdev events */
367 static const char * const eth_event_desc[] = {
368         [RTE_ETH_EVENT_UNKNOWN] = "unknown",
369         [RTE_ETH_EVENT_INTR_LSC] = "link state change",
370         [RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
371         [RTE_ETH_EVENT_INTR_RESET] = "reset",
372         [RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
373         [RTE_ETH_EVENT_IPSEC] = "IPsec",
374         [RTE_ETH_EVENT_MACSEC] = "MACsec",
375         [RTE_ETH_EVENT_INTR_RMV] = "device removal",
376         [RTE_ETH_EVENT_NEW] = "device probed",
377         [RTE_ETH_EVENT_DESTROY] = "device released",
378         [RTE_ETH_EVENT_MAX] = NULL,
379 };
380
381 /*
382  * Display or mask ether events
383  * Default to all events except VF_MBOX
384  */
385 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
386                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
387                             (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
388                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
389                             (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
390                             (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
391                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV);
392 /*
393  * Decide if all memory are locked for performance.
394  */
395 int do_mlockall = 0;
396
397 /*
398  * NIC bypass mode configuration options.
399  */
400
401 #if defined RTE_LIBRTE_IXGBE_PMD && defined RTE_LIBRTE_IXGBE_BYPASS
402 /* The NIC bypass watchdog timeout. */
403 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
404 #endif
405
406
407 #ifdef RTE_LIBRTE_LATENCY_STATS
408
409 /*
410  * Set when latency stats is enabled in the commandline
411  */
412 uint8_t latencystats_enabled;
413
414 /*
415  * Lcore ID to serive latency statistics.
416  */
417 lcoreid_t latencystats_lcore_id = -1;
418
419 #endif
420
421 /*
422  * Ethernet device configuration.
423  */
424 struct rte_eth_rxmode rx_mode = {
425         .max_rx_pkt_len = RTE_ETHER_MAX_LEN,
426                 /**< Default maximum frame length. */
427 };
428
429 struct rte_eth_txmode tx_mode = {
430         .offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
431 };
432
433 struct rte_fdir_conf fdir_conf = {
434         .mode = RTE_FDIR_MODE_NONE,
435         .pballoc = RTE_FDIR_PBALLOC_64K,
436         .status = RTE_FDIR_REPORT_STATUS,
437         .mask = {
438                 .vlan_tci_mask = 0xFFEF,
439                 .ipv4_mask     = {
440                         .src_ip = 0xFFFFFFFF,
441                         .dst_ip = 0xFFFFFFFF,
442                 },
443                 .ipv6_mask     = {
444                         .src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
445                         .dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
446                 },
447                 .src_port_mask = 0xFFFF,
448                 .dst_port_mask = 0xFFFF,
449                 .mac_addr_byte_mask = 0xFF,
450                 .tunnel_type_mask = 1,
451                 .tunnel_id_mask = 0xFFFFFFFF,
452         },
453         .drop_queue = 127,
454 };
455
456 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
457
458 struct queue_stats_mappings tx_queue_stats_mappings_array[MAX_TX_QUEUE_STATS_MAPPINGS];
459 struct queue_stats_mappings rx_queue_stats_mappings_array[MAX_RX_QUEUE_STATS_MAPPINGS];
460
461 struct queue_stats_mappings *tx_queue_stats_mappings = tx_queue_stats_mappings_array;
462 struct queue_stats_mappings *rx_queue_stats_mappings = rx_queue_stats_mappings_array;
463
464 uint16_t nb_tx_queue_stats_mappings = 0;
465 uint16_t nb_rx_queue_stats_mappings = 0;
466
467 /*
468  * Display zero values by default for xstats
469  */
470 uint8_t xstats_hide_zero;
471
472 unsigned int num_sockets = 0;
473 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
474
475 #ifdef RTE_LIBRTE_BITRATE
476 /* Bitrate statistics */
477 struct rte_stats_bitrates *bitrate_data;
478 lcoreid_t bitrate_lcore_id;
479 uint8_t bitrate_enabled;
480 #endif
481
482 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
483 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
484
485 /* Forward function declarations */
486 static void setup_attached_port(portid_t pi);
487 static void map_port_queue_stats_mapping_registers(portid_t pi,
488                                                    struct rte_port *port);
489 static void check_all_ports_link_status(uint32_t port_mask);
490 static int eth_event_callback(portid_t port_id,
491                               enum rte_eth_event_type type,
492                               void *param, void *ret_param);
493 static void dev_event_callback(const char *device_name,
494                                 enum rte_dev_event_type type,
495                                 void *param);
496
497 /*
498  * Check if all the ports are started.
499  * If yes, return positive value. If not, return zero.
500  */
501 static int all_ports_started(void);
502
503 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
504 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
505
506 /* Holds the registered mbuf dynamic flags names. */
507 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
508
509 /*
510  * Helper function to check if socket is already discovered.
511  * If yes, return positive value. If not, return zero.
512  */
513 int
514 new_socket_id(unsigned int socket_id)
515 {
516         unsigned int i;
517
518         for (i = 0; i < num_sockets; i++) {
519                 if (socket_ids[i] == socket_id)
520                         return 0;
521         }
522         return 1;
523 }
524
525 /*
526  * Setup default configuration.
527  */
528 static void
529 set_default_fwd_lcores_config(void)
530 {
531         unsigned int i;
532         unsigned int nb_lc;
533         unsigned int sock_num;
534
535         nb_lc = 0;
536         for (i = 0; i < RTE_MAX_LCORE; i++) {
537                 if (!rte_lcore_is_enabled(i))
538                         continue;
539                 sock_num = rte_lcore_to_socket_id(i);
540                 if (new_socket_id(sock_num)) {
541                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
542                                 rte_exit(EXIT_FAILURE,
543                                          "Total sockets greater than %u\n",
544                                          RTE_MAX_NUMA_NODES);
545                         }
546                         socket_ids[num_sockets++] = sock_num;
547                 }
548                 if (i == rte_get_master_lcore())
549                         continue;
550                 fwd_lcores_cpuids[nb_lc++] = i;
551         }
552         nb_lcores = (lcoreid_t) nb_lc;
553         nb_cfg_lcores = nb_lcores;
554         nb_fwd_lcores = 1;
555 }
556
557 static void
558 set_def_peer_eth_addrs(void)
559 {
560         portid_t i;
561
562         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
563                 peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
564                 peer_eth_addrs[i].addr_bytes[5] = i;
565         }
566 }
567
568 static void
569 set_default_fwd_ports_config(void)
570 {
571         portid_t pt_id;
572         int i = 0;
573
574         RTE_ETH_FOREACH_DEV(pt_id) {
575                 fwd_ports_ids[i++] = pt_id;
576
577                 /* Update sockets info according to the attached device */
578                 int socket_id = rte_eth_dev_socket_id(pt_id);
579                 if (socket_id >= 0 && new_socket_id(socket_id)) {
580                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
581                                 rte_exit(EXIT_FAILURE,
582                                          "Total sockets greater than %u\n",
583                                          RTE_MAX_NUMA_NODES);
584                         }
585                         socket_ids[num_sockets++] = socket_id;
586                 }
587         }
588
589         nb_cfg_ports = nb_ports;
590         nb_fwd_ports = nb_ports;
591 }
592
593 void
594 set_def_fwd_config(void)
595 {
596         set_default_fwd_lcores_config();
597         set_def_peer_eth_addrs();
598         set_default_fwd_ports_config();
599 }
600
601 /* extremely pessimistic estimation of memory required to create a mempool */
602 static int
603 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
604 {
605         unsigned int n_pages, mbuf_per_pg, leftover;
606         uint64_t total_mem, mbuf_mem, obj_sz;
607
608         /* there is no good way to predict how much space the mempool will
609          * occupy because it will allocate chunks on the fly, and some of those
610          * will come from default DPDK memory while some will come from our
611          * external memory, so just assume 128MB will be enough for everyone.
612          */
613         uint64_t hdr_mem = 128 << 20;
614
615         /* account for possible non-contiguousness */
616         obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
617         if (obj_sz > pgsz) {
618                 TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
619                 return -1;
620         }
621
622         mbuf_per_pg = pgsz / obj_sz;
623         leftover = (nb_mbufs % mbuf_per_pg) > 0;
624         n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
625
626         mbuf_mem = n_pages * pgsz;
627
628         total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
629
630         if (total_mem > SIZE_MAX) {
631                 TESTPMD_LOG(ERR, "Memory size too big\n");
632                 return -1;
633         }
634         *out = (size_t)total_mem;
635
636         return 0;
637 }
638
639 static int
640 pagesz_flags(uint64_t page_sz)
641 {
642         /* as per mmap() manpage, all page sizes are log2 of page size
643          * shifted by MAP_HUGE_SHIFT
644          */
645         int log2 = rte_log2_u64(page_sz);
646
647         return (log2 << HUGE_SHIFT);
648 }
649
650 static void *
651 alloc_mem(size_t memsz, size_t pgsz, bool huge)
652 {
653         void *addr;
654         int flags;
655
656         /* allocate anonymous hugepages */
657         flags = MAP_ANONYMOUS | MAP_PRIVATE;
658         if (huge)
659                 flags |= HUGE_FLAG | pagesz_flags(pgsz);
660
661         addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
662         if (addr == MAP_FAILED)
663                 return NULL;
664
665         return addr;
666 }
667
668 struct extmem_param {
669         void *addr;
670         size_t len;
671         size_t pgsz;
672         rte_iova_t *iova_table;
673         unsigned int iova_table_len;
674 };
675
676 static int
677 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
678                 bool huge)
679 {
680         uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
681                         RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
682         unsigned int cur_page, n_pages, pgsz_idx;
683         size_t mem_sz, cur_pgsz;
684         rte_iova_t *iovas = NULL;
685         void *addr;
686         int ret;
687
688         for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
689                 /* skip anything that is too big */
690                 if (pgsizes[pgsz_idx] > SIZE_MAX)
691                         continue;
692
693                 cur_pgsz = pgsizes[pgsz_idx];
694
695                 /* if we were told not to allocate hugepages, override */
696                 if (!huge)
697                         cur_pgsz = sysconf(_SC_PAGESIZE);
698
699                 ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
700                 if (ret < 0) {
701                         TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
702                         return -1;
703                 }
704
705                 /* allocate our memory */
706                 addr = alloc_mem(mem_sz, cur_pgsz, huge);
707
708                 /* if we couldn't allocate memory with a specified page size,
709                  * that doesn't mean we can't do it with other page sizes, so
710                  * try another one.
711                  */
712                 if (addr == NULL)
713                         continue;
714
715                 /* store IOVA addresses for every page in this memory area */
716                 n_pages = mem_sz / cur_pgsz;
717
718                 iovas = malloc(sizeof(*iovas) * n_pages);
719
720                 if (iovas == NULL) {
721                         TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
722                         goto fail;
723                 }
724                 /* lock memory if it's not huge pages */
725                 if (!huge)
726                         mlock(addr, mem_sz);
727
728                 /* populate IOVA addresses */
729                 for (cur_page = 0; cur_page < n_pages; cur_page++) {
730                         rte_iova_t iova;
731                         size_t offset;
732                         void *cur;
733
734                         offset = cur_pgsz * cur_page;
735                         cur = RTE_PTR_ADD(addr, offset);
736
737                         /* touch the page before getting its IOVA */
738                         *(volatile char *)cur = 0;
739
740                         iova = rte_mem_virt2iova(cur);
741
742                         iovas[cur_page] = iova;
743                 }
744
745                 break;
746         }
747         /* if we couldn't allocate anything */
748         if (iovas == NULL)
749                 return -1;
750
751         param->addr = addr;
752         param->len = mem_sz;
753         param->pgsz = cur_pgsz;
754         param->iova_table = iovas;
755         param->iova_table_len = n_pages;
756
757         return 0;
758 fail:
759         if (iovas)
760                 free(iovas);
761         if (addr)
762                 munmap(addr, mem_sz);
763
764         return -1;
765 }
766
767 static int
768 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
769 {
770         struct extmem_param param;
771         int socket_id, ret;
772
773         memset(&param, 0, sizeof(param));
774
775         /* check if our heap exists */
776         socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
777         if (socket_id < 0) {
778                 /* create our heap */
779                 ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
780                 if (ret < 0) {
781                         TESTPMD_LOG(ERR, "Cannot create heap\n");
782                         return -1;
783                 }
784         }
785
786         ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
787         if (ret < 0) {
788                 TESTPMD_LOG(ERR, "Cannot create memory area\n");
789                 return -1;
790         }
791
792         /* we now have a valid memory area, so add it to heap */
793         ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
794                         param.addr, param.len, param.iova_table,
795                         param.iova_table_len, param.pgsz);
796
797         /* when using VFIO, memory is automatically mapped for DMA by EAL */
798
799         /* not needed any more */
800         free(param.iova_table);
801
802         if (ret < 0) {
803                 TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
804                 munmap(param.addr, param.len);
805                 return -1;
806         }
807
808         /* success */
809
810         TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
811                         param.len >> 20);
812
813         return 0;
814 }
815 static void
816 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
817              struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
818 {
819         uint16_t pid = 0;
820         int ret;
821
822         RTE_ETH_FOREACH_DEV(pid) {
823                 struct rte_eth_dev *dev =
824                         &rte_eth_devices[pid];
825
826                 ret = rte_dev_dma_unmap(dev->device, memhdr->addr, 0,
827                                         memhdr->len);
828                 if (ret) {
829                         TESTPMD_LOG(DEBUG,
830                                     "unable to DMA unmap addr 0x%p "
831                                     "for device %s\n",
832                                     memhdr->addr, dev->data->name);
833                 }
834         }
835         ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
836         if (ret) {
837                 TESTPMD_LOG(DEBUG,
838                             "unable to un-register addr 0x%p\n", memhdr->addr);
839         }
840 }
841
842 static void
843 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
844            struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
845 {
846         uint16_t pid = 0;
847         size_t page_size = sysconf(_SC_PAGESIZE);
848         int ret;
849
850         ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
851                                   page_size);
852         if (ret) {
853                 TESTPMD_LOG(DEBUG,
854                             "unable to register addr 0x%p\n", memhdr->addr);
855                 return;
856         }
857         RTE_ETH_FOREACH_DEV(pid) {
858                 struct rte_eth_dev *dev =
859                         &rte_eth_devices[pid];
860
861                 ret = rte_dev_dma_map(dev->device, memhdr->addr, 0,
862                                       memhdr->len);
863                 if (ret) {
864                         TESTPMD_LOG(DEBUG,
865                                     "unable to DMA map addr 0x%p "
866                                     "for device %s\n",
867                                     memhdr->addr, dev->data->name);
868                 }
869         }
870 }
871
872 static unsigned int
873 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
874             char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
875 {
876         struct rte_pktmbuf_extmem *xmem;
877         unsigned int ext_num, zone_num, elt_num;
878         uint16_t elt_size;
879
880         elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
881         elt_num = EXTBUF_ZONE_SIZE / elt_size;
882         zone_num = (nb_mbufs + elt_num - 1) / elt_num;
883
884         xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
885         if (xmem == NULL) {
886                 TESTPMD_LOG(ERR, "Cannot allocate memory for "
887                                  "external buffer descriptors\n");
888                 *ext_mem = NULL;
889                 return 0;
890         }
891         for (ext_num = 0; ext_num < zone_num; ext_num++) {
892                 struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
893                 const struct rte_memzone *mz;
894                 char mz_name[RTE_MEMZONE_NAMESIZE];
895                 int ret;
896
897                 ret = snprintf(mz_name, sizeof(mz_name),
898                         RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
899                 if (ret < 0 || ret >= (int)sizeof(mz_name)) {
900                         errno = ENAMETOOLONG;
901                         ext_num = 0;
902                         break;
903                 }
904                 mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
905                                                  socket_id,
906                                                  RTE_MEMZONE_IOVA_CONTIG |
907                                                  RTE_MEMZONE_1GB |
908                                                  RTE_MEMZONE_SIZE_HINT_ONLY,
909                                                  EXTBUF_ZONE_SIZE);
910                 if (mz == NULL) {
911                         /*
912                          * The caller exits on external buffer creation
913                          * error, so there is no need to free memzones.
914                          */
915                         errno = ENOMEM;
916                         ext_num = 0;
917                         break;
918                 }
919                 xseg->buf_ptr = mz->addr;
920                 xseg->buf_iova = mz->iova;
921                 xseg->buf_len = EXTBUF_ZONE_SIZE;
922                 xseg->elt_size = elt_size;
923         }
924         if (ext_num == 0 && xmem != NULL) {
925                 free(xmem);
926                 xmem = NULL;
927         }
928         *ext_mem = xmem;
929         return ext_num;
930 }
931
932 /*
933  * Configuration initialisation done once at init time.
934  */
935 static struct rte_mempool *
936 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
937                  unsigned int socket_id)
938 {
939         char pool_name[RTE_MEMPOOL_NAMESIZE];
940         struct rte_mempool *rte_mp = NULL;
941         uint32_t mb_size;
942
943         mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
944         mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name));
945
946         TESTPMD_LOG(INFO,
947                 "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
948                 pool_name, nb_mbuf, mbuf_seg_size, socket_id);
949
950         switch (mp_alloc_type) {
951         case MP_ALLOC_NATIVE:
952                 {
953                         /* wrapper to rte_mempool_create() */
954                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
955                                         rte_mbuf_best_mempool_ops());
956                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
957                                 mb_mempool_cache, 0, mbuf_seg_size, socket_id);
958                         break;
959                 }
960         case MP_ALLOC_ANON:
961                 {
962                         rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
963                                 mb_size, (unsigned int) mb_mempool_cache,
964                                 sizeof(struct rte_pktmbuf_pool_private),
965                                 socket_id, mempool_flags);
966                         if (rte_mp == NULL)
967                                 goto err;
968
969                         if (rte_mempool_populate_anon(rte_mp) == 0) {
970                                 rte_mempool_free(rte_mp);
971                                 rte_mp = NULL;
972                                 goto err;
973                         }
974                         rte_pktmbuf_pool_init(rte_mp, NULL);
975                         rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
976                         rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
977                         break;
978                 }
979         case MP_ALLOC_XMEM:
980         case MP_ALLOC_XMEM_HUGE:
981                 {
982                         int heap_socket;
983                         bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
984
985                         if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
986                                 rte_exit(EXIT_FAILURE, "Could not create external memory\n");
987
988                         heap_socket =
989                                 rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
990                         if (heap_socket < 0)
991                                 rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
992
993                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
994                                         rte_mbuf_best_mempool_ops());
995                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
996                                         mb_mempool_cache, 0, mbuf_seg_size,
997                                         heap_socket);
998                         break;
999                 }
1000         case MP_ALLOC_XBUF:
1001                 {
1002                         struct rte_pktmbuf_extmem *ext_mem;
1003                         unsigned int ext_num;
1004
1005                         ext_num = setup_extbuf(nb_mbuf, mbuf_seg_size,
1006                                                socket_id, pool_name, &ext_mem);
1007                         if (ext_num == 0)
1008                                 rte_exit(EXIT_FAILURE,
1009                                          "Can't create pinned data buffers\n");
1010
1011                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1012                                         rte_mbuf_best_mempool_ops());
1013                         rte_mp = rte_pktmbuf_pool_create_extbuf
1014                                         (pool_name, nb_mbuf, mb_mempool_cache,
1015                                          0, mbuf_seg_size, socket_id,
1016                                          ext_mem, ext_num);
1017                         free(ext_mem);
1018                         break;
1019                 }
1020         default:
1021                 {
1022                         rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1023                 }
1024         }
1025
1026 err:
1027         if (rte_mp == NULL) {
1028                 rte_exit(EXIT_FAILURE,
1029                         "Creation of mbuf pool for socket %u failed: %s\n",
1030                         socket_id, rte_strerror(rte_errno));
1031         } else if (verbose_level > 0) {
1032                 rte_mempool_dump(stdout, rte_mp);
1033         }
1034         return rte_mp;
1035 }
1036
1037 /*
1038  * Check given socket id is valid or not with NUMA mode,
1039  * if valid, return 0, else return -1
1040  */
1041 static int
1042 check_socket_id(const unsigned int socket_id)
1043 {
1044         static int warning_once = 0;
1045
1046         if (new_socket_id(socket_id)) {
1047                 if (!warning_once && numa_support)
1048                         printf("Warning: NUMA should be configured manually by"
1049                                " using --port-numa-config and"
1050                                " --ring-numa-config parameters along with"
1051                                " --numa.\n");
1052                 warning_once = 1;
1053                 return -1;
1054         }
1055         return 0;
1056 }
1057
1058 /*
1059  * Get the allowed maximum number of RX queues.
1060  * *pid return the port id which has minimal value of
1061  * max_rx_queues in all ports.
1062  */
1063 queueid_t
1064 get_allowed_max_nb_rxq(portid_t *pid)
1065 {
1066         queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1067         bool max_rxq_valid = false;
1068         portid_t pi;
1069         struct rte_eth_dev_info dev_info;
1070
1071         RTE_ETH_FOREACH_DEV(pi) {
1072                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1073                         continue;
1074
1075                 max_rxq_valid = true;
1076                 if (dev_info.max_rx_queues < allowed_max_rxq) {
1077                         allowed_max_rxq = dev_info.max_rx_queues;
1078                         *pid = pi;
1079                 }
1080         }
1081         return max_rxq_valid ? allowed_max_rxq : 0;
1082 }
1083
1084 /*
1085  * Check input rxq is valid or not.
1086  * If input rxq is not greater than any of maximum number
1087  * of RX queues of all ports, it is valid.
1088  * if valid, return 0, else return -1
1089  */
1090 int
1091 check_nb_rxq(queueid_t rxq)
1092 {
1093         queueid_t allowed_max_rxq;
1094         portid_t pid = 0;
1095
1096         allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1097         if (rxq > allowed_max_rxq) {
1098                 printf("Fail: input rxq (%u) can't be greater "
1099                        "than max_rx_queues (%u) of port %u\n",
1100                        rxq,
1101                        allowed_max_rxq,
1102                        pid);
1103                 return -1;
1104         }
1105         return 0;
1106 }
1107
1108 /*
1109  * Get the allowed maximum number of TX queues.
1110  * *pid return the port id which has minimal value of
1111  * max_tx_queues in all ports.
1112  */
1113 queueid_t
1114 get_allowed_max_nb_txq(portid_t *pid)
1115 {
1116         queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1117         bool max_txq_valid = false;
1118         portid_t pi;
1119         struct rte_eth_dev_info dev_info;
1120
1121         RTE_ETH_FOREACH_DEV(pi) {
1122                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1123                         continue;
1124
1125                 max_txq_valid = true;
1126                 if (dev_info.max_tx_queues < allowed_max_txq) {
1127                         allowed_max_txq = dev_info.max_tx_queues;
1128                         *pid = pi;
1129                 }
1130         }
1131         return max_txq_valid ? allowed_max_txq : 0;
1132 }
1133
1134 /*
1135  * Check input txq is valid or not.
1136  * If input txq is not greater than any of maximum number
1137  * of TX queues of all ports, it is valid.
1138  * if valid, return 0, else return -1
1139  */
1140 int
1141 check_nb_txq(queueid_t txq)
1142 {
1143         queueid_t allowed_max_txq;
1144         portid_t pid = 0;
1145
1146         allowed_max_txq = get_allowed_max_nb_txq(&pid);
1147         if (txq > allowed_max_txq) {
1148                 printf("Fail: input txq (%u) can't be greater "
1149                        "than max_tx_queues (%u) of port %u\n",
1150                        txq,
1151                        allowed_max_txq,
1152                        pid);
1153                 return -1;
1154         }
1155         return 0;
1156 }
1157
1158 /*
1159  * Get the allowed maximum number of hairpin queues.
1160  * *pid return the port id which has minimal value of
1161  * max_hairpin_queues in all ports.
1162  */
1163 queueid_t
1164 get_allowed_max_nb_hairpinq(portid_t *pid)
1165 {
1166         queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1167         portid_t pi;
1168         struct rte_eth_hairpin_cap cap;
1169
1170         RTE_ETH_FOREACH_DEV(pi) {
1171                 if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1172                         *pid = pi;
1173                         return 0;
1174                 }
1175                 if (cap.max_nb_queues < allowed_max_hairpinq) {
1176                         allowed_max_hairpinq = cap.max_nb_queues;
1177                         *pid = pi;
1178                 }
1179         }
1180         return allowed_max_hairpinq;
1181 }
1182
1183 /*
1184  * Check input hairpin is valid or not.
1185  * If input hairpin is not greater than any of maximum number
1186  * of hairpin queues of all ports, it is valid.
1187  * if valid, return 0, else return -1
1188  */
1189 int
1190 check_nb_hairpinq(queueid_t hairpinq)
1191 {
1192         queueid_t allowed_max_hairpinq;
1193         portid_t pid = 0;
1194
1195         allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1196         if (hairpinq > allowed_max_hairpinq) {
1197                 printf("Fail: input hairpin (%u) can't be greater "
1198                        "than max_hairpin_queues (%u) of port %u\n",
1199                        hairpinq, allowed_max_hairpinq, pid);
1200                 return -1;
1201         }
1202         return 0;
1203 }
1204
1205 static void
1206 init_config(void)
1207 {
1208         portid_t pid;
1209         struct rte_port *port;
1210         struct rte_mempool *mbp;
1211         unsigned int nb_mbuf_per_pool;
1212         lcoreid_t  lc_id;
1213         uint8_t port_per_socket[RTE_MAX_NUMA_NODES];
1214         struct rte_gro_param gro_param;
1215         uint32_t gso_types;
1216         uint16_t data_size;
1217         bool warning = 0;
1218         int k;
1219         int ret;
1220
1221         memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
1222
1223         /* Configuration of logical cores. */
1224         fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1225                                 sizeof(struct fwd_lcore *) * nb_lcores,
1226                                 RTE_CACHE_LINE_SIZE);
1227         if (fwd_lcores == NULL) {
1228                 rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1229                                                         "failed\n", nb_lcores);
1230         }
1231         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1232                 fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1233                                                sizeof(struct fwd_lcore),
1234                                                RTE_CACHE_LINE_SIZE);
1235                 if (fwd_lcores[lc_id] == NULL) {
1236                         rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1237                                                                 "failed\n");
1238                 }
1239                 fwd_lcores[lc_id]->cpuid_idx = lc_id;
1240         }
1241
1242         RTE_ETH_FOREACH_DEV(pid) {
1243                 port = &ports[pid];
1244                 /* Apply default TxRx configuration for all ports */
1245                 port->dev_conf.txmode = tx_mode;
1246                 port->dev_conf.rxmode = rx_mode;
1247
1248                 ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1249                 if (ret != 0)
1250                         rte_exit(EXIT_FAILURE,
1251                                  "rte_eth_dev_info_get() failed\n");
1252
1253                 if (!(port->dev_info.tx_offload_capa &
1254                       DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1255                         port->dev_conf.txmode.offloads &=
1256                                 ~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1257                 if (numa_support) {
1258                         if (port_numa[pid] != NUMA_NO_CONFIG)
1259                                 port_per_socket[port_numa[pid]]++;
1260                         else {
1261                                 uint32_t socket_id = rte_eth_dev_socket_id(pid);
1262
1263                                 /*
1264                                  * if socket_id is invalid,
1265                                  * set to the first available socket.
1266                                  */
1267                                 if (check_socket_id(socket_id) < 0)
1268                                         socket_id = socket_ids[0];
1269                                 port_per_socket[socket_id]++;
1270                         }
1271                 }
1272
1273                 /* Apply Rx offloads configuration */
1274                 for (k = 0; k < port->dev_info.max_rx_queues; k++)
1275                         port->rx_conf[k].offloads =
1276                                 port->dev_conf.rxmode.offloads;
1277                 /* Apply Tx offloads configuration */
1278                 for (k = 0; k < port->dev_info.max_tx_queues; k++)
1279                         port->tx_conf[k].offloads =
1280                                 port->dev_conf.txmode.offloads;
1281
1282                 /* set flag to initialize port/queue */
1283                 port->need_reconfig = 1;
1284                 port->need_reconfig_queues = 1;
1285                 port->tx_metadata = 0;
1286
1287                 /* Check for maximum number of segments per MTU. Accordingly
1288                  * update the mbuf data size.
1289                  */
1290                 if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1291                                 port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1292                         data_size = rx_mode.max_rx_pkt_len /
1293                                 port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1294
1295                         if ((data_size + RTE_PKTMBUF_HEADROOM) >
1296                                                         mbuf_data_size) {
1297                                 mbuf_data_size = data_size +
1298                                                  RTE_PKTMBUF_HEADROOM;
1299                                 warning = 1;
1300                         }
1301                 }
1302         }
1303
1304         if (warning)
1305                 TESTPMD_LOG(WARNING, "Configured mbuf size %hu\n",
1306                             mbuf_data_size);
1307
1308         /*
1309          * Create pools of mbuf.
1310          * If NUMA support is disabled, create a single pool of mbuf in
1311          * socket 0 memory by default.
1312          * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1313          *
1314          * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1315          * nb_txd can be configured at run time.
1316          */
1317         if (param_total_num_mbufs)
1318                 nb_mbuf_per_pool = param_total_num_mbufs;
1319         else {
1320                 nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1321                         (nb_lcores * mb_mempool_cache) +
1322                         RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1323                 nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1324         }
1325
1326         if (numa_support) {
1327                 uint8_t i;
1328
1329                 for (i = 0; i < num_sockets; i++)
1330                         mempools[i] = mbuf_pool_create(mbuf_data_size,
1331                                                        nb_mbuf_per_pool,
1332                                                        socket_ids[i]);
1333         } else {
1334                 if (socket_num == UMA_NO_CONFIG)
1335                         mempools[0] = mbuf_pool_create(mbuf_data_size,
1336                                                        nb_mbuf_per_pool, 0);
1337                 else
1338                         mempools[socket_num] = mbuf_pool_create
1339                                                         (mbuf_data_size,
1340                                                          nb_mbuf_per_pool,
1341                                                          socket_num);
1342         }
1343
1344         init_port_config();
1345
1346         gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1347                 DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1348         /*
1349          * Records which Mbuf pool to use by each logical core, if needed.
1350          */
1351         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1352                 mbp = mbuf_pool_find(
1353                         rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]));
1354
1355                 if (mbp == NULL)
1356                         mbp = mbuf_pool_find(0);
1357                 fwd_lcores[lc_id]->mbp = mbp;
1358                 /* initialize GSO context */
1359                 fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1360                 fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1361                 fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1362                 fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1363                         RTE_ETHER_CRC_LEN;
1364                 fwd_lcores[lc_id]->gso_ctx.flag = 0;
1365         }
1366
1367         /* Configuration of packet forwarding streams. */
1368         if (init_fwd_streams() < 0)
1369                 rte_exit(EXIT_FAILURE, "FAIL from init_fwd_streams()\n");
1370
1371         fwd_config_setup();
1372
1373         /* create a gro context for each lcore */
1374         gro_param.gro_types = RTE_GRO_TCP_IPV4;
1375         gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1376         gro_param.max_item_per_flow = MAX_PKT_BURST;
1377         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1378                 gro_param.socket_id = rte_lcore_to_socket_id(
1379                                 fwd_lcores_cpuids[lc_id]);
1380                 fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1381                 if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1382                         rte_exit(EXIT_FAILURE,
1383                                         "rte_gro_ctx_create() failed\n");
1384                 }
1385         }
1386
1387 #if defined RTE_LIBRTE_PMD_SOFTNIC
1388         if (strcmp(cur_fwd_eng->fwd_mode_name, "softnic") == 0) {
1389                 RTE_ETH_FOREACH_DEV(pid) {
1390                         port = &ports[pid];
1391                         const char *driver = port->dev_info.driver_name;
1392
1393                         if (strcmp(driver, "net_softnic") == 0)
1394                                 port->softport.fwd_lcore_arg = fwd_lcores;
1395                 }
1396         }
1397 #endif
1398
1399 }
1400
1401
1402 void
1403 reconfig(portid_t new_port_id, unsigned socket_id)
1404 {
1405         struct rte_port *port;
1406         int ret;
1407
1408         /* Reconfiguration of Ethernet ports. */
1409         port = &ports[new_port_id];
1410
1411         ret = eth_dev_info_get_print_err(new_port_id, &port->dev_info);
1412         if (ret != 0)
1413                 return;
1414
1415         /* set flag to initialize port/queue */
1416         port->need_reconfig = 1;
1417         port->need_reconfig_queues = 1;
1418         port->socket_id = socket_id;
1419
1420         init_port_config();
1421 }
1422
1423
1424 int
1425 init_fwd_streams(void)
1426 {
1427         portid_t pid;
1428         struct rte_port *port;
1429         streamid_t sm_id, nb_fwd_streams_new;
1430         queueid_t q;
1431
1432         /* set socket id according to numa or not */
1433         RTE_ETH_FOREACH_DEV(pid) {
1434                 port = &ports[pid];
1435                 if (nb_rxq > port->dev_info.max_rx_queues) {
1436                         printf("Fail: nb_rxq(%d) is greater than "
1437                                 "max_rx_queues(%d)\n", nb_rxq,
1438                                 port->dev_info.max_rx_queues);
1439                         return -1;
1440                 }
1441                 if (nb_txq > port->dev_info.max_tx_queues) {
1442                         printf("Fail: nb_txq(%d) is greater than "
1443                                 "max_tx_queues(%d)\n", nb_txq,
1444                                 port->dev_info.max_tx_queues);
1445                         return -1;
1446                 }
1447                 if (numa_support) {
1448                         if (port_numa[pid] != NUMA_NO_CONFIG)
1449                                 port->socket_id = port_numa[pid];
1450                         else {
1451                                 port->socket_id = rte_eth_dev_socket_id(pid);
1452
1453                                 /*
1454                                  * if socket_id is invalid,
1455                                  * set to the first available socket.
1456                                  */
1457                                 if (check_socket_id(port->socket_id) < 0)
1458                                         port->socket_id = socket_ids[0];
1459                         }
1460                 }
1461                 else {
1462                         if (socket_num == UMA_NO_CONFIG)
1463                                 port->socket_id = 0;
1464                         else
1465                                 port->socket_id = socket_num;
1466                 }
1467         }
1468
1469         q = RTE_MAX(nb_rxq, nb_txq);
1470         if (q == 0) {
1471                 printf("Fail: Cannot allocate fwd streams as number of queues is 0\n");
1472                 return -1;
1473         }
1474         nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1475         if (nb_fwd_streams_new == nb_fwd_streams)
1476                 return 0;
1477         /* clear the old */
1478         if (fwd_streams != NULL) {
1479                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1480                         if (fwd_streams[sm_id] == NULL)
1481                                 continue;
1482                         rte_free(fwd_streams[sm_id]);
1483                         fwd_streams[sm_id] = NULL;
1484                 }
1485                 rte_free(fwd_streams);
1486                 fwd_streams = NULL;
1487         }
1488
1489         /* init new */
1490         nb_fwd_streams = nb_fwd_streams_new;
1491         if (nb_fwd_streams) {
1492                 fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1493                         sizeof(struct fwd_stream *) * nb_fwd_streams,
1494                         RTE_CACHE_LINE_SIZE);
1495                 if (fwd_streams == NULL)
1496                         rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1497                                  " (struct fwd_stream *)) failed\n",
1498                                  nb_fwd_streams);
1499
1500                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1501                         fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1502                                 " struct fwd_stream", sizeof(struct fwd_stream),
1503                                 RTE_CACHE_LINE_SIZE);
1504                         if (fwd_streams[sm_id] == NULL)
1505                                 rte_exit(EXIT_FAILURE, "rte_zmalloc"
1506                                          "(struct fwd_stream) failed\n");
1507                 }
1508         }
1509
1510         return 0;
1511 }
1512
1513 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1514 static void
1515 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1516 {
1517         unsigned int total_burst;
1518         unsigned int nb_burst;
1519         unsigned int burst_stats[3];
1520         uint16_t pktnb_stats[3];
1521         uint16_t nb_pkt;
1522         int burst_percent[3];
1523
1524         /*
1525          * First compute the total number of packet bursts and the
1526          * two highest numbers of bursts of the same number of packets.
1527          */
1528         total_burst = 0;
1529         burst_stats[0] = burst_stats[1] = burst_stats[2] = 0;
1530         pktnb_stats[0] = pktnb_stats[1] = pktnb_stats[2] = 0;
1531         for (nb_pkt = 0; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1532                 nb_burst = pbs->pkt_burst_spread[nb_pkt];
1533                 if (nb_burst == 0)
1534                         continue;
1535                 total_burst += nb_burst;
1536                 if (nb_burst > burst_stats[0]) {
1537                         burst_stats[1] = burst_stats[0];
1538                         pktnb_stats[1] = pktnb_stats[0];
1539                         burst_stats[0] = nb_burst;
1540                         pktnb_stats[0] = nb_pkt;
1541                 } else if (nb_burst > burst_stats[1]) {
1542                         burst_stats[1] = nb_burst;
1543                         pktnb_stats[1] = nb_pkt;
1544                 }
1545         }
1546         if (total_burst == 0)
1547                 return;
1548         burst_percent[0] = (burst_stats[0] * 100) / total_burst;
1549         printf("  %s-bursts : %u [%d%% of %d pkts", rx_tx, total_burst,
1550                burst_percent[0], (int) pktnb_stats[0]);
1551         if (burst_stats[0] == total_burst) {
1552                 printf("]\n");
1553                 return;
1554         }
1555         if (burst_stats[0] + burst_stats[1] == total_burst) {
1556                 printf(" + %d%% of %d pkts]\n",
1557                        100 - burst_percent[0], pktnb_stats[1]);
1558                 return;
1559         }
1560         burst_percent[1] = (burst_stats[1] * 100) / total_burst;
1561         burst_percent[2] = 100 - (burst_percent[0] + burst_percent[1]);
1562         if ((burst_percent[1] == 0) || (burst_percent[2] == 0)) {
1563                 printf(" + %d%% of others]\n", 100 - burst_percent[0]);
1564                 return;
1565         }
1566         printf(" + %d%% of %d pkts + %d%% of others]\n",
1567                burst_percent[1], (int) pktnb_stats[1], burst_percent[2]);
1568 }
1569 #endif /* RTE_TEST_PMD_RECORD_BURST_STATS */
1570
1571 static void
1572 fwd_stream_stats_display(streamid_t stream_id)
1573 {
1574         struct fwd_stream *fs;
1575         static const char *fwd_top_stats_border = "-------";
1576
1577         fs = fwd_streams[stream_id];
1578         if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1579             (fs->fwd_dropped == 0))
1580                 return;
1581         printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1582                "TX Port=%2d/Queue=%2d %s\n",
1583                fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1584                fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1585         printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1586                " TX-dropped: %-14"PRIu64,
1587                fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1588
1589         /* if checksum mode */
1590         if (cur_fwd_eng == &csum_fwd_engine) {
1591                 printf("  RX- bad IP checksum: %-14"PRIu64
1592                        "  Rx- bad L4 checksum: %-14"PRIu64
1593                        " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1594                         fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1595                         fs->rx_bad_outer_l4_csum);
1596         } else {
1597                 printf("\n");
1598         }
1599
1600 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1601         pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1602         pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1603 #endif
1604 }
1605
1606 void
1607 fwd_stats_display(void)
1608 {
1609         static const char *fwd_stats_border = "----------------------";
1610         static const char *acc_stats_border = "+++++++++++++++";
1611         struct {
1612                 struct fwd_stream *rx_stream;
1613                 struct fwd_stream *tx_stream;
1614                 uint64_t tx_dropped;
1615                 uint64_t rx_bad_ip_csum;
1616                 uint64_t rx_bad_l4_csum;
1617                 uint64_t rx_bad_outer_l4_csum;
1618         } ports_stats[RTE_MAX_ETHPORTS];
1619         uint64_t total_rx_dropped = 0;
1620         uint64_t total_tx_dropped = 0;
1621         uint64_t total_rx_nombuf = 0;
1622         struct rte_eth_stats stats;
1623 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1624         uint64_t fwd_cycles = 0;
1625 #endif
1626         uint64_t total_recv = 0;
1627         uint64_t total_xmit = 0;
1628         struct rte_port *port;
1629         streamid_t sm_id;
1630         portid_t pt_id;
1631         int i;
1632
1633         memset(ports_stats, 0, sizeof(ports_stats));
1634
1635         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1636                 struct fwd_stream *fs = fwd_streams[sm_id];
1637
1638                 if (cur_fwd_config.nb_fwd_streams >
1639                     cur_fwd_config.nb_fwd_ports) {
1640                         fwd_stream_stats_display(sm_id);
1641                 } else {
1642                         ports_stats[fs->tx_port].tx_stream = fs;
1643                         ports_stats[fs->rx_port].rx_stream = fs;
1644                 }
1645
1646                 ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
1647
1648                 ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
1649                 ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
1650                 ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
1651                                 fs->rx_bad_outer_l4_csum;
1652
1653 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1654                 fwd_cycles += fs->core_cycles;
1655 #endif
1656         }
1657         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1658                 uint8_t j;
1659
1660                 pt_id = fwd_ports_ids[i];
1661                 port = &ports[pt_id];
1662
1663                 rte_eth_stats_get(pt_id, &stats);
1664                 stats.ipackets -= port->stats.ipackets;
1665                 stats.opackets -= port->stats.opackets;
1666                 stats.ibytes -= port->stats.ibytes;
1667                 stats.obytes -= port->stats.obytes;
1668                 stats.imissed -= port->stats.imissed;
1669                 stats.oerrors -= port->stats.oerrors;
1670                 stats.rx_nombuf -= port->stats.rx_nombuf;
1671
1672                 total_recv += stats.ipackets;
1673                 total_xmit += stats.opackets;
1674                 total_rx_dropped += stats.imissed;
1675                 total_tx_dropped += ports_stats[pt_id].tx_dropped;
1676                 total_tx_dropped += stats.oerrors;
1677                 total_rx_nombuf  += stats.rx_nombuf;
1678
1679                 printf("\n  %s Forward statistics for port %-2d %s\n",
1680                        fwd_stats_border, pt_id, fwd_stats_border);
1681
1682                 if (!port->rx_queue_stats_mapping_enabled &&
1683                     !port->tx_queue_stats_mapping_enabled) {
1684                         printf("  RX-packets: %-14"PRIu64
1685                                " RX-dropped: %-14"PRIu64
1686                                "RX-total: %-"PRIu64"\n",
1687                                stats.ipackets, stats.imissed,
1688                                stats.ipackets + stats.imissed);
1689
1690                         if (cur_fwd_eng == &csum_fwd_engine)
1691                                 printf("  Bad-ipcsum: %-14"PRIu64
1692                                        " Bad-l4csum: %-14"PRIu64
1693                                        "Bad-outer-l4csum: %-14"PRIu64"\n",
1694                                        ports_stats[pt_id].rx_bad_ip_csum,
1695                                        ports_stats[pt_id].rx_bad_l4_csum,
1696                                        ports_stats[pt_id].rx_bad_outer_l4_csum);
1697                         if (stats.ierrors + stats.rx_nombuf > 0) {
1698                                 printf("  RX-error: %-"PRIu64"\n",
1699                                        stats.ierrors);
1700                                 printf("  RX-nombufs: %-14"PRIu64"\n",
1701                                        stats.rx_nombuf);
1702                         }
1703
1704                         printf("  TX-packets: %-14"PRIu64
1705                                " TX-dropped: %-14"PRIu64
1706                                "TX-total: %-"PRIu64"\n",
1707                                stats.opackets, ports_stats[pt_id].tx_dropped,
1708                                stats.opackets + ports_stats[pt_id].tx_dropped);
1709                 } else {
1710                         printf("  RX-packets:             %14"PRIu64
1711                                "    RX-dropped:%14"PRIu64
1712                                "    RX-total:%14"PRIu64"\n",
1713                                stats.ipackets, stats.imissed,
1714                                stats.ipackets + stats.imissed);
1715
1716                         if (cur_fwd_eng == &csum_fwd_engine)
1717                                 printf("  Bad-ipcsum:%14"PRIu64
1718                                        "    Bad-l4csum:%14"PRIu64
1719                                        "    Bad-outer-l4csum: %-14"PRIu64"\n",
1720                                        ports_stats[pt_id].rx_bad_ip_csum,
1721                                        ports_stats[pt_id].rx_bad_l4_csum,
1722                                        ports_stats[pt_id].rx_bad_outer_l4_csum);
1723                         if ((stats.ierrors + stats.rx_nombuf) > 0) {
1724                                 printf("  RX-error:%"PRIu64"\n", stats.ierrors);
1725                                 printf("  RX-nombufs:             %14"PRIu64"\n",
1726                                        stats.rx_nombuf);
1727                         }
1728
1729                         printf("  TX-packets:             %14"PRIu64
1730                                "    TX-dropped:%14"PRIu64
1731                                "    TX-total:%14"PRIu64"\n",
1732                                stats.opackets, ports_stats[pt_id].tx_dropped,
1733                                stats.opackets + ports_stats[pt_id].tx_dropped);
1734                 }
1735
1736 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1737                 if (ports_stats[pt_id].rx_stream)
1738                         pkt_burst_stats_display("RX",
1739                                 &ports_stats[pt_id].rx_stream->rx_burst_stats);
1740                 if (ports_stats[pt_id].tx_stream)
1741                         pkt_burst_stats_display("TX",
1742                                 &ports_stats[pt_id].tx_stream->tx_burst_stats);
1743 #endif
1744
1745                 if (port->rx_queue_stats_mapping_enabled) {
1746                         printf("\n");
1747                         for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1748                                 printf("  Stats reg %2d RX-packets:%14"PRIu64
1749                                        "     RX-errors:%14"PRIu64
1750                                        "    RX-bytes:%14"PRIu64"\n",
1751                                        j, stats.q_ipackets[j],
1752                                        stats.q_errors[j], stats.q_ibytes[j]);
1753                         }
1754                         printf("\n");
1755                 }
1756                 if (port->tx_queue_stats_mapping_enabled) {
1757                         for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1758                                 printf("  Stats reg %2d TX-packets:%14"PRIu64
1759                                        "                                 TX-bytes:%14"
1760                                        PRIu64"\n",
1761                                        j, stats.q_opackets[j],
1762                                        stats.q_obytes[j]);
1763                         }
1764                 }
1765
1766                 printf("  %s--------------------------------%s\n",
1767                        fwd_stats_border, fwd_stats_border);
1768         }
1769
1770         printf("\n  %s Accumulated forward statistics for all ports"
1771                "%s\n",
1772                acc_stats_border, acc_stats_border);
1773         printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
1774                "%-"PRIu64"\n"
1775                "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
1776                "%-"PRIu64"\n",
1777                total_recv, total_rx_dropped, total_recv + total_rx_dropped,
1778                total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
1779         if (total_rx_nombuf > 0)
1780                 printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
1781         printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
1782                "%s\n",
1783                acc_stats_border, acc_stats_border);
1784 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1785         if (total_recv > 0)
1786                 printf("\n  CPU cycles/packet=%u (total cycles="
1787                        "%"PRIu64" / total RX packets=%"PRIu64")\n",
1788                        (unsigned int)(fwd_cycles / total_recv),
1789                        fwd_cycles, total_recv);
1790 #endif
1791 }
1792
1793 void
1794 fwd_stats_reset(void)
1795 {
1796         streamid_t sm_id;
1797         portid_t pt_id;
1798         int i;
1799
1800         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1801                 pt_id = fwd_ports_ids[i];
1802                 rte_eth_stats_get(pt_id, &ports[pt_id].stats);
1803         }
1804         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1805                 struct fwd_stream *fs = fwd_streams[sm_id];
1806
1807                 fs->rx_packets = 0;
1808                 fs->tx_packets = 0;
1809                 fs->fwd_dropped = 0;
1810                 fs->rx_bad_ip_csum = 0;
1811                 fs->rx_bad_l4_csum = 0;
1812                 fs->rx_bad_outer_l4_csum = 0;
1813
1814 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1815                 memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
1816                 memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
1817 #endif
1818 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1819                 fs->core_cycles = 0;
1820 #endif
1821         }
1822 }
1823
1824 static void
1825 flush_fwd_rx_queues(void)
1826 {
1827         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1828         portid_t  rxp;
1829         portid_t port_id;
1830         queueid_t rxq;
1831         uint16_t  nb_rx;
1832         uint16_t  i;
1833         uint8_t   j;
1834         uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
1835         uint64_t timer_period;
1836
1837         /* convert to number of cycles */
1838         timer_period = rte_get_timer_hz(); /* 1 second timeout */
1839
1840         for (j = 0; j < 2; j++) {
1841                 for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
1842                         for (rxq = 0; rxq < nb_rxq; rxq++) {
1843                                 port_id = fwd_ports_ids[rxp];
1844                                 /**
1845                                 * testpmd can stuck in the below do while loop
1846                                 * if rte_eth_rx_burst() always returns nonzero
1847                                 * packets. So timer is added to exit this loop
1848                                 * after 1sec timer expiry.
1849                                 */
1850                                 prev_tsc = rte_rdtsc();
1851                                 do {
1852                                         nb_rx = rte_eth_rx_burst(port_id, rxq,
1853                                                 pkts_burst, MAX_PKT_BURST);
1854                                         for (i = 0; i < nb_rx; i++)
1855                                                 rte_pktmbuf_free(pkts_burst[i]);
1856
1857                                         cur_tsc = rte_rdtsc();
1858                                         diff_tsc = cur_tsc - prev_tsc;
1859                                         timer_tsc += diff_tsc;
1860                                 } while ((nb_rx > 0) &&
1861                                         (timer_tsc < timer_period));
1862                                 timer_tsc = 0;
1863                         }
1864                 }
1865                 rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
1866         }
1867 }
1868
1869 static void
1870 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
1871 {
1872         struct fwd_stream **fsm;
1873         streamid_t nb_fs;
1874         streamid_t sm_id;
1875 #ifdef RTE_LIBRTE_BITRATE
1876         uint64_t tics_per_1sec;
1877         uint64_t tics_datum;
1878         uint64_t tics_current;
1879         uint16_t i, cnt_ports;
1880
1881         cnt_ports = nb_ports;
1882         tics_datum = rte_rdtsc();
1883         tics_per_1sec = rte_get_timer_hz();
1884 #endif
1885         fsm = &fwd_streams[fc->stream_idx];
1886         nb_fs = fc->stream_nb;
1887         do {
1888                 for (sm_id = 0; sm_id < nb_fs; sm_id++)
1889                         (*pkt_fwd)(fsm[sm_id]);
1890 #ifdef RTE_LIBRTE_BITRATE
1891                 if (bitrate_enabled != 0 &&
1892                                 bitrate_lcore_id == rte_lcore_id()) {
1893                         tics_current = rte_rdtsc();
1894                         if (tics_current - tics_datum >= tics_per_1sec) {
1895                                 /* Periodic bitrate calculation */
1896                                 for (i = 0; i < cnt_ports; i++)
1897                                         rte_stats_bitrate_calc(bitrate_data,
1898                                                 ports_ids[i]);
1899                                 tics_datum = tics_current;
1900                         }
1901                 }
1902 #endif
1903 #ifdef RTE_LIBRTE_LATENCY_STATS
1904                 if (latencystats_enabled != 0 &&
1905                                 latencystats_lcore_id == rte_lcore_id())
1906                         rte_latencystats_update();
1907 #endif
1908
1909         } while (! fc->stopped);
1910 }
1911
1912 static int
1913 start_pkt_forward_on_core(void *fwd_arg)
1914 {
1915         run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
1916                              cur_fwd_config.fwd_eng->packet_fwd);
1917         return 0;
1918 }
1919
1920 /*
1921  * Run the TXONLY packet forwarding engine to send a single burst of packets.
1922  * Used to start communication flows in network loopback test configurations.
1923  */
1924 static int
1925 run_one_txonly_burst_on_core(void *fwd_arg)
1926 {
1927         struct fwd_lcore *fwd_lc;
1928         struct fwd_lcore tmp_lcore;
1929
1930         fwd_lc = (struct fwd_lcore *) fwd_arg;
1931         tmp_lcore = *fwd_lc;
1932         tmp_lcore.stopped = 1;
1933         run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
1934         return 0;
1935 }
1936
1937 /*
1938  * Launch packet forwarding:
1939  *     - Setup per-port forwarding context.
1940  *     - launch logical cores with their forwarding configuration.
1941  */
1942 static void
1943 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
1944 {
1945         port_fwd_begin_t port_fwd_begin;
1946         unsigned int i;
1947         unsigned int lc_id;
1948         int diag;
1949
1950         port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
1951         if (port_fwd_begin != NULL) {
1952                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
1953                         (*port_fwd_begin)(fwd_ports_ids[i]);
1954         }
1955         for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
1956                 lc_id = fwd_lcores_cpuids[i];
1957                 if ((interactive == 0) || (lc_id != rte_lcore_id())) {
1958                         fwd_lcores[i]->stopped = 0;
1959                         diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
1960                                                      fwd_lcores[i], lc_id);
1961                         if (diag != 0)
1962                                 printf("launch lcore %u failed - diag=%d\n",
1963                                        lc_id, diag);
1964                 }
1965         }
1966 }
1967
1968 /*
1969  * Launch packet forwarding configuration.
1970  */
1971 void
1972 start_packet_forwarding(int with_tx_first)
1973 {
1974         port_fwd_begin_t port_fwd_begin;
1975         port_fwd_end_t  port_fwd_end;
1976         struct rte_port *port;
1977         unsigned int i;
1978         portid_t   pt_id;
1979
1980         if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
1981                 rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
1982
1983         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
1984                 rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
1985
1986         if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
1987                 strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
1988                 (!nb_rxq || !nb_txq))
1989                 rte_exit(EXIT_FAILURE,
1990                         "Either rxq or txq are 0, cannot use %s fwd mode\n",
1991                         cur_fwd_eng->fwd_mode_name);
1992
1993         if (all_ports_started() == 0) {
1994                 printf("Not all ports were started\n");
1995                 return;
1996         }
1997         if (test_done == 0) {
1998                 printf("Packet forwarding already started\n");
1999                 return;
2000         }
2001
2002
2003         if(dcb_test) {
2004                 for (i = 0; i < nb_fwd_ports; i++) {
2005                         pt_id = fwd_ports_ids[i];
2006                         port = &ports[pt_id];
2007                         if (!port->dcb_flag) {
2008                                 printf("In DCB mode, all forwarding ports must "
2009                                        "be configured in this mode.\n");
2010                                 return;
2011                         }
2012                 }
2013                 if (nb_fwd_lcores == 1) {
2014                         printf("In DCB mode,the nb forwarding cores "
2015                                "should be larger than 1.\n");
2016                         return;
2017                 }
2018         }
2019         test_done = 0;
2020
2021         fwd_config_setup();
2022
2023         if(!no_flush_rx)
2024                 flush_fwd_rx_queues();
2025
2026         pkt_fwd_config_display(&cur_fwd_config);
2027         rxtx_config_display();
2028
2029         fwd_stats_reset();
2030         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2031                 pt_id = fwd_ports_ids[i];
2032                 port = &ports[pt_id];
2033                 map_port_queue_stats_mapping_registers(pt_id, port);
2034         }
2035         if (with_tx_first) {
2036                 port_fwd_begin = tx_only_engine.port_fwd_begin;
2037                 if (port_fwd_begin != NULL) {
2038                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2039                                 (*port_fwd_begin)(fwd_ports_ids[i]);
2040                 }
2041                 while (with_tx_first--) {
2042                         launch_packet_forwarding(
2043                                         run_one_txonly_burst_on_core);
2044                         rte_eal_mp_wait_lcore();
2045                 }
2046                 port_fwd_end = tx_only_engine.port_fwd_end;
2047                 if (port_fwd_end != NULL) {
2048                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2049                                 (*port_fwd_end)(fwd_ports_ids[i]);
2050                 }
2051         }
2052         launch_packet_forwarding(start_pkt_forward_on_core);
2053 }
2054
2055 void
2056 stop_packet_forwarding(void)
2057 {
2058         port_fwd_end_t port_fwd_end;
2059         lcoreid_t lc_id;
2060         portid_t pt_id;
2061         int i;
2062
2063         if (test_done) {
2064                 printf("Packet forwarding not started\n");
2065                 return;
2066         }
2067         printf("Telling cores to stop...");
2068         for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2069                 fwd_lcores[lc_id]->stopped = 1;
2070         printf("\nWaiting for lcores to finish...\n");
2071         rte_eal_mp_wait_lcore();
2072         port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2073         if (port_fwd_end != NULL) {
2074                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2075                         pt_id = fwd_ports_ids[i];
2076                         (*port_fwd_end)(pt_id);
2077                 }
2078         }
2079
2080         fwd_stats_display();
2081
2082         printf("\nDone.\n");
2083         test_done = 1;
2084 }
2085
2086 void
2087 dev_set_link_up(portid_t pid)
2088 {
2089         if (rte_eth_dev_set_link_up(pid) < 0)
2090                 printf("\nSet link up fail.\n");
2091 }
2092
2093 void
2094 dev_set_link_down(portid_t pid)
2095 {
2096         if (rte_eth_dev_set_link_down(pid) < 0)
2097                 printf("\nSet link down fail.\n");
2098 }
2099
2100 static int
2101 all_ports_started(void)
2102 {
2103         portid_t pi;
2104         struct rte_port *port;
2105
2106         RTE_ETH_FOREACH_DEV(pi) {
2107                 port = &ports[pi];
2108                 /* Check if there is a port which is not started */
2109                 if ((port->port_status != RTE_PORT_STARTED) &&
2110                         (port->slave_flag == 0))
2111                         return 0;
2112         }
2113
2114         /* No port is not started */
2115         return 1;
2116 }
2117
2118 int
2119 port_is_stopped(portid_t port_id)
2120 {
2121         struct rte_port *port = &ports[port_id];
2122
2123         if ((port->port_status != RTE_PORT_STOPPED) &&
2124             (port->slave_flag == 0))
2125                 return 0;
2126         return 1;
2127 }
2128
2129 int
2130 all_ports_stopped(void)
2131 {
2132         portid_t pi;
2133
2134         RTE_ETH_FOREACH_DEV(pi) {
2135                 if (!port_is_stopped(pi))
2136                         return 0;
2137         }
2138
2139         return 1;
2140 }
2141
2142 int
2143 port_is_started(portid_t port_id)
2144 {
2145         if (port_id_is_invalid(port_id, ENABLED_WARN))
2146                 return 0;
2147
2148         if (ports[port_id].port_status != RTE_PORT_STARTED)
2149                 return 0;
2150
2151         return 1;
2152 }
2153
2154 /* Configure the Rx and Tx hairpin queues for the selected port. */
2155 static int
2156 setup_hairpin_queues(portid_t pi)
2157 {
2158         queueid_t qi;
2159         struct rte_eth_hairpin_conf hairpin_conf = {
2160                 .peer_count = 1,
2161         };
2162         int i;
2163         int diag;
2164         struct rte_port *port = &ports[pi];
2165
2166         for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2167                 hairpin_conf.peers[0].port = pi;
2168                 hairpin_conf.peers[0].queue = i + nb_rxq;
2169                 diag = rte_eth_tx_hairpin_queue_setup
2170                         (pi, qi, nb_txd, &hairpin_conf);
2171                 i++;
2172                 if (diag == 0)
2173                         continue;
2174
2175                 /* Fail to setup rx queue, return */
2176                 if (rte_atomic16_cmpset(&(port->port_status),
2177                                         RTE_PORT_HANDLING,
2178                                         RTE_PORT_STOPPED) == 0)
2179                         printf("Port %d can not be set back "
2180                                         "to stopped\n", pi);
2181                 printf("Fail to configure port %d hairpin "
2182                                 "queues\n", pi);
2183                 /* try to reconfigure queues next time */
2184                 port->need_reconfig_queues = 1;
2185                 return -1;
2186         }
2187         for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2188                 hairpin_conf.peers[0].port = pi;
2189                 hairpin_conf.peers[0].queue = i + nb_txq;
2190                 diag = rte_eth_rx_hairpin_queue_setup
2191                         (pi, qi, nb_rxd, &hairpin_conf);
2192                 i++;
2193                 if (diag == 0)
2194                         continue;
2195
2196                 /* Fail to setup rx queue, return */
2197                 if (rte_atomic16_cmpset(&(port->port_status),
2198                                         RTE_PORT_HANDLING,
2199                                         RTE_PORT_STOPPED) == 0)
2200                         printf("Port %d can not be set back "
2201                                         "to stopped\n", pi);
2202                 printf("Fail to configure port %d hairpin "
2203                                 "queues\n", pi);
2204                 /* try to reconfigure queues next time */
2205                 port->need_reconfig_queues = 1;
2206                 return -1;
2207         }
2208         return 0;
2209 }
2210
2211 int
2212 start_port(portid_t pid)
2213 {
2214         int diag, need_check_link_status = -1;
2215         portid_t pi;
2216         queueid_t qi;
2217         struct rte_port *port;
2218         struct rte_ether_addr mac_addr;
2219         struct rte_eth_hairpin_cap cap;
2220
2221         if (port_id_is_invalid(pid, ENABLED_WARN))
2222                 return 0;
2223
2224         if(dcb_config)
2225                 dcb_test = 1;
2226         RTE_ETH_FOREACH_DEV(pi) {
2227                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2228                         continue;
2229
2230                 need_check_link_status = 0;
2231                 port = &ports[pi];
2232                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2233                                                  RTE_PORT_HANDLING) == 0) {
2234                         printf("Port %d is now not stopped\n", pi);
2235                         continue;
2236                 }
2237
2238                 if (port->need_reconfig > 0) {
2239                         port->need_reconfig = 0;
2240
2241                         if (flow_isolate_all) {
2242                                 int ret = port_flow_isolate(pi, 1);
2243                                 if (ret) {
2244                                         printf("Failed to apply isolated"
2245                                                " mode on port %d\n", pi);
2246                                         return -1;
2247                                 }
2248                         }
2249                         configure_rxtx_dump_callbacks(0);
2250                         printf("Configuring Port %d (socket %u)\n", pi,
2251                                         port->socket_id);
2252                         if (nb_hairpinq > 0 &&
2253                             rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2254                                 printf("Port %d doesn't support hairpin "
2255                                        "queues\n", pi);
2256                                 return -1;
2257                         }
2258                         /* configure port */
2259                         diag = rte_eth_dev_configure(pi, nb_rxq + nb_hairpinq,
2260                                                      nb_txq + nb_hairpinq,
2261                                                      &(port->dev_conf));
2262                         if (diag != 0) {
2263                                 if (rte_atomic16_cmpset(&(port->port_status),
2264                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2265                                         printf("Port %d can not be set back "
2266                                                         "to stopped\n", pi);
2267                                 printf("Fail to configure port %d\n", pi);
2268                                 /* try to reconfigure port next time */
2269                                 port->need_reconfig = 1;
2270                                 return -1;
2271                         }
2272                 }
2273                 if (port->need_reconfig_queues > 0) {
2274                         port->need_reconfig_queues = 0;
2275                         /* setup tx queues */
2276                         for (qi = 0; qi < nb_txq; qi++) {
2277                                 if ((numa_support) &&
2278                                         (txring_numa[pi] != NUMA_NO_CONFIG))
2279                                         diag = rte_eth_tx_queue_setup(pi, qi,
2280                                                 port->nb_tx_desc[qi],
2281                                                 txring_numa[pi],
2282                                                 &(port->tx_conf[qi]));
2283                                 else
2284                                         diag = rte_eth_tx_queue_setup(pi, qi,
2285                                                 port->nb_tx_desc[qi],
2286                                                 port->socket_id,
2287                                                 &(port->tx_conf[qi]));
2288
2289                                 if (diag == 0)
2290                                         continue;
2291
2292                                 /* Fail to setup tx queue, return */
2293                                 if (rte_atomic16_cmpset(&(port->port_status),
2294                                                         RTE_PORT_HANDLING,
2295                                                         RTE_PORT_STOPPED) == 0)
2296                                         printf("Port %d can not be set back "
2297                                                         "to stopped\n", pi);
2298                                 printf("Fail to configure port %d tx queues\n",
2299                                        pi);
2300                                 /* try to reconfigure queues next time */
2301                                 port->need_reconfig_queues = 1;
2302                                 return -1;
2303                         }
2304                         for (qi = 0; qi < nb_rxq; qi++) {
2305                                 /* setup rx queues */
2306                                 if ((numa_support) &&
2307                                         (rxring_numa[pi] != NUMA_NO_CONFIG)) {
2308                                         struct rte_mempool * mp =
2309                                                 mbuf_pool_find(rxring_numa[pi]);
2310                                         if (mp == NULL) {
2311                                                 printf("Failed to setup RX queue:"
2312                                                         "No mempool allocation"
2313                                                         " on the socket %d\n",
2314                                                         rxring_numa[pi]);
2315                                                 return -1;
2316                                         }
2317
2318                                         diag = rte_eth_rx_queue_setup(pi, qi,
2319                                              port->nb_rx_desc[qi],
2320                                              rxring_numa[pi],
2321                                              &(port->rx_conf[qi]),
2322                                              mp);
2323                                 } else {
2324                                         struct rte_mempool *mp =
2325                                                 mbuf_pool_find(port->socket_id);
2326                                         if (mp == NULL) {
2327                                                 printf("Failed to setup RX queue:"
2328                                                         "No mempool allocation"
2329                                                         " on the socket %d\n",
2330                                                         port->socket_id);
2331                                                 return -1;
2332                                         }
2333                                         diag = rte_eth_rx_queue_setup(pi, qi,
2334                                              port->nb_rx_desc[qi],
2335                                              port->socket_id,
2336                                              &(port->rx_conf[qi]),
2337                                              mp);
2338                                 }
2339                                 if (diag == 0)
2340                                         continue;
2341
2342                                 /* Fail to setup rx queue, return */
2343                                 if (rte_atomic16_cmpset(&(port->port_status),
2344                                                         RTE_PORT_HANDLING,
2345                                                         RTE_PORT_STOPPED) == 0)
2346                                         printf("Port %d can not be set back "
2347                                                         "to stopped\n", pi);
2348                                 printf("Fail to configure port %d rx queues\n",
2349                                        pi);
2350                                 /* try to reconfigure queues next time */
2351                                 port->need_reconfig_queues = 1;
2352                                 return -1;
2353                         }
2354                         /* setup hairpin queues */
2355                         if (setup_hairpin_queues(pi) != 0)
2356                                 return -1;
2357                 }
2358                 configure_rxtx_dump_callbacks(verbose_level);
2359                 if (clear_ptypes) {
2360                         diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2361                                         NULL, 0);
2362                         if (diag < 0)
2363                                 printf(
2364                                 "Port %d: Failed to disable Ptype parsing\n",
2365                                 pi);
2366                 }
2367
2368                 /* start port */
2369                 if (rte_eth_dev_start(pi) < 0) {
2370                         printf("Fail to start port %d\n", pi);
2371
2372                         /* Fail to setup rx queue, return */
2373                         if (rte_atomic16_cmpset(&(port->port_status),
2374                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2375                                 printf("Port %d can not be set back to "
2376                                                         "stopped\n", pi);
2377                         continue;
2378                 }
2379
2380                 if (rte_atomic16_cmpset(&(port->port_status),
2381                         RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2382                         printf("Port %d can not be set into started\n", pi);
2383
2384                 if (eth_macaddr_get_print_err(pi, &mac_addr) == 0)
2385                         printf("Port %d: %02X:%02X:%02X:%02X:%02X:%02X\n", pi,
2386                                 mac_addr.addr_bytes[0], mac_addr.addr_bytes[1],
2387                                 mac_addr.addr_bytes[2], mac_addr.addr_bytes[3],
2388                                 mac_addr.addr_bytes[4], mac_addr.addr_bytes[5]);
2389
2390                 /* at least one port started, need checking link status */
2391                 need_check_link_status = 1;
2392         }
2393
2394         if (need_check_link_status == 1 && !no_link_check)
2395                 check_all_ports_link_status(RTE_PORT_ALL);
2396         else if (need_check_link_status == 0)
2397                 printf("Please stop the ports first\n");
2398
2399         printf("Done\n");
2400         return 0;
2401 }
2402
2403 void
2404 stop_port(portid_t pid)
2405 {
2406         portid_t pi;
2407         struct rte_port *port;
2408         int need_check_link_status = 0;
2409
2410         if (dcb_test) {
2411                 dcb_test = 0;
2412                 dcb_config = 0;
2413         }
2414
2415         if (port_id_is_invalid(pid, ENABLED_WARN))
2416                 return;
2417
2418         printf("Stopping ports...\n");
2419
2420         RTE_ETH_FOREACH_DEV(pi) {
2421                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2422                         continue;
2423
2424                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
2425                         printf("Please remove port %d from forwarding configuration.\n", pi);
2426                         continue;
2427                 }
2428
2429                 if (port_is_bonding_slave(pi)) {
2430                         printf("Please remove port %d from bonded device.\n", pi);
2431                         continue;
2432                 }
2433
2434                 port = &ports[pi];
2435                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
2436                                                 RTE_PORT_HANDLING) == 0)
2437                         continue;
2438
2439                 rte_eth_dev_stop(pi);
2440
2441                 if (rte_atomic16_cmpset(&(port->port_status),
2442                         RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2443                         printf("Port %d can not be set into stopped\n", pi);
2444                 need_check_link_status = 1;
2445         }
2446         if (need_check_link_status && !no_link_check)
2447                 check_all_ports_link_status(RTE_PORT_ALL);
2448
2449         printf("Done\n");
2450 }
2451
2452 static void
2453 remove_invalid_ports_in(portid_t *array, portid_t *total)
2454 {
2455         portid_t i;
2456         portid_t new_total = 0;
2457
2458         for (i = 0; i < *total; i++)
2459                 if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
2460                         array[new_total] = array[i];
2461                         new_total++;
2462                 }
2463         *total = new_total;
2464 }
2465
2466 static void
2467 remove_invalid_ports(void)
2468 {
2469         remove_invalid_ports_in(ports_ids, &nb_ports);
2470         remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
2471         nb_cfg_ports = nb_fwd_ports;
2472 }
2473
2474 void
2475 close_port(portid_t pid)
2476 {
2477         portid_t pi;
2478         struct rte_port *port;
2479
2480         if (port_id_is_invalid(pid, ENABLED_WARN))
2481                 return;
2482
2483         printf("Closing ports...\n");
2484
2485         RTE_ETH_FOREACH_DEV(pi) {
2486                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2487                         continue;
2488
2489                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
2490                         printf("Please remove port %d from forwarding configuration.\n", pi);
2491                         continue;
2492                 }
2493
2494                 if (port_is_bonding_slave(pi)) {
2495                         printf("Please remove port %d from bonded device.\n", pi);
2496                         continue;
2497                 }
2498
2499                 port = &ports[pi];
2500                 if (rte_atomic16_cmpset(&(port->port_status),
2501                         RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
2502                         printf("Port %d is already closed\n", pi);
2503                         continue;
2504                 }
2505
2506                 if (rte_atomic16_cmpset(&(port->port_status),
2507                         RTE_PORT_STOPPED, RTE_PORT_HANDLING) == 0) {
2508                         printf("Port %d is now not stopped\n", pi);
2509                         continue;
2510                 }
2511
2512                 if (port->flow_list)
2513                         port_flow_flush(pi);
2514                 rte_eth_dev_close(pi);
2515
2516                 remove_invalid_ports();
2517
2518                 if (rte_atomic16_cmpset(&(port->port_status),
2519                         RTE_PORT_HANDLING, RTE_PORT_CLOSED) == 0)
2520                         printf("Port %d cannot be set to closed\n", pi);
2521         }
2522
2523         printf("Done\n");
2524 }
2525
2526 void
2527 reset_port(portid_t pid)
2528 {
2529         int diag;
2530         portid_t pi;
2531         struct rte_port *port;
2532
2533         if (port_id_is_invalid(pid, ENABLED_WARN))
2534                 return;
2535
2536         if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
2537                 (pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
2538                 printf("Can not reset port(s), please stop port(s) first.\n");
2539                 return;
2540         }
2541
2542         printf("Resetting ports...\n");
2543
2544         RTE_ETH_FOREACH_DEV(pi) {
2545                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2546                         continue;
2547
2548                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
2549                         printf("Please remove port %d from forwarding "
2550                                "configuration.\n", pi);
2551                         continue;
2552                 }
2553
2554                 if (port_is_bonding_slave(pi)) {
2555                         printf("Please remove port %d from bonded device.\n",
2556                                pi);
2557                         continue;
2558                 }
2559
2560                 diag = rte_eth_dev_reset(pi);
2561                 if (diag == 0) {
2562                         port = &ports[pi];
2563                         port->need_reconfig = 1;
2564                         port->need_reconfig_queues = 1;
2565                 } else {
2566                         printf("Failed to reset port %d. diag=%d\n", pi, diag);
2567                 }
2568         }
2569
2570         printf("Done\n");
2571 }
2572
2573 void
2574 attach_port(char *identifier)
2575 {
2576         portid_t pi;
2577         struct rte_dev_iterator iterator;
2578
2579         printf("Attaching a new port...\n");
2580
2581         if (identifier == NULL) {
2582                 printf("Invalid parameters are specified\n");
2583                 return;
2584         }
2585
2586         if (rte_dev_probe(identifier) < 0) {
2587                 TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
2588                 return;
2589         }
2590
2591         /* first attach mode: event */
2592         if (setup_on_probe_event) {
2593                 /* new ports are detected on RTE_ETH_EVENT_NEW event */
2594                 for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
2595                         if (ports[pi].port_status == RTE_PORT_HANDLING &&
2596                                         ports[pi].need_setup != 0)
2597                                 setup_attached_port(pi);
2598                 return;
2599         }
2600
2601         /* second attach mode: iterator */
2602         RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
2603                 /* setup ports matching the devargs used for probing */
2604                 if (port_is_forwarding(pi))
2605                         continue; /* port was already attached before */
2606                 setup_attached_port(pi);
2607         }
2608 }
2609
2610 static void
2611 setup_attached_port(portid_t pi)
2612 {
2613         unsigned int socket_id;
2614         int ret;
2615
2616         socket_id = (unsigned)rte_eth_dev_socket_id(pi);
2617         /* if socket_id is invalid, set to the first available socket. */
2618         if (check_socket_id(socket_id) < 0)
2619                 socket_id = socket_ids[0];
2620         reconfig(pi, socket_id);
2621         ret = rte_eth_promiscuous_enable(pi);
2622         if (ret != 0)
2623                 printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
2624                         pi, rte_strerror(-ret));
2625
2626         ports_ids[nb_ports++] = pi;
2627         fwd_ports_ids[nb_fwd_ports++] = pi;
2628         nb_cfg_ports = nb_fwd_ports;
2629         ports[pi].need_setup = 0;
2630         ports[pi].port_status = RTE_PORT_STOPPED;
2631
2632         printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
2633         printf("Done\n");
2634 }
2635
2636 static void
2637 detach_device(struct rte_device *dev)
2638 {
2639         portid_t sibling;
2640
2641         if (dev == NULL) {
2642                 printf("Device already removed\n");
2643                 return;
2644         }
2645
2646         printf("Removing a device...\n");
2647
2648         if (rte_dev_remove(dev) < 0) {
2649                 TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
2650                 return;
2651         }
2652         RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
2653                 /* reset mapping between old ports and removed device */
2654                 rte_eth_devices[sibling].device = NULL;
2655                 if (ports[sibling].port_status != RTE_PORT_CLOSED) {
2656                         /* sibling ports are forced to be closed */
2657                         ports[sibling].port_status = RTE_PORT_CLOSED;
2658                         printf("Port %u is closed\n", sibling);
2659                 }
2660         }
2661
2662         remove_invalid_ports();
2663
2664         printf("Device is detached\n");
2665         printf("Now total ports is %d\n", nb_ports);
2666         printf("Done\n");
2667         return;
2668 }
2669
2670 void
2671 detach_port_device(portid_t port_id)
2672 {
2673         if (port_id_is_invalid(port_id, ENABLED_WARN))
2674                 return;
2675
2676         if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2677                 if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2678                         printf("Port not stopped\n");
2679                         return;
2680                 }
2681                 printf("Port was not closed\n");
2682                 if (ports[port_id].flow_list)
2683                         port_flow_flush(port_id);
2684         }
2685
2686         detach_device(rte_eth_devices[port_id].device);
2687 }
2688
2689 void
2690 detach_devargs(char *identifier)
2691 {
2692         struct rte_dev_iterator iterator;
2693         struct rte_devargs da;
2694         portid_t port_id;
2695
2696         printf("Removing a device...\n");
2697
2698         memset(&da, 0, sizeof(da));
2699         if (rte_devargs_parsef(&da, "%s", identifier)) {
2700                 printf("cannot parse identifier\n");
2701                 if (da.args)
2702                         free(da.args);
2703                 return;
2704         }
2705
2706         RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
2707                 if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2708                         if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2709                                 printf("Port %u not stopped\n", port_id);
2710                                 rte_eth_iterator_cleanup(&iterator);
2711                                 return;
2712                         }
2713
2714                         /* sibling ports are forced to be closed */
2715                         if (ports[port_id].flow_list)
2716                                 port_flow_flush(port_id);
2717                         ports[port_id].port_status = RTE_PORT_CLOSED;
2718                         printf("Port %u is now closed\n", port_id);
2719                 }
2720         }
2721
2722         if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
2723                 TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
2724                             da.name, da.bus->name);
2725                 return;
2726         }
2727
2728         remove_invalid_ports();
2729
2730         printf("Device %s is detached\n", identifier);
2731         printf("Now total ports is %d\n", nb_ports);
2732         printf("Done\n");
2733 }
2734
2735 void
2736 pmd_test_exit(void)
2737 {
2738         portid_t pt_id;
2739         int ret;
2740         int i;
2741
2742         if (test_done == 0)
2743                 stop_packet_forwarding();
2744
2745         for (i = 0 ; i < RTE_MAX_NUMA_NODES ; i++) {
2746                 if (mempools[i]) {
2747                         if (mp_alloc_type == MP_ALLOC_ANON)
2748                                 rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
2749                                                      NULL);
2750                 }
2751         }
2752         if (ports != NULL) {
2753                 no_link_check = 1;
2754                 RTE_ETH_FOREACH_DEV(pt_id) {
2755                         printf("\nStopping port %d...\n", pt_id);
2756                         fflush(stdout);
2757                         stop_port(pt_id);
2758                 }
2759                 RTE_ETH_FOREACH_DEV(pt_id) {
2760                         printf("\nShutting down port %d...\n", pt_id);
2761                         fflush(stdout);
2762                         close_port(pt_id);
2763                 }
2764         }
2765
2766         if (hot_plug) {
2767                 ret = rte_dev_event_monitor_stop();
2768                 if (ret) {
2769                         RTE_LOG(ERR, EAL,
2770                                 "fail to stop device event monitor.");
2771                         return;
2772                 }
2773
2774                 ret = rte_dev_event_callback_unregister(NULL,
2775                         dev_event_callback, NULL);
2776                 if (ret < 0) {
2777                         RTE_LOG(ERR, EAL,
2778                                 "fail to unregister device event callback.\n");
2779                         return;
2780                 }
2781
2782                 ret = rte_dev_hotplug_handle_disable();
2783                 if (ret) {
2784                         RTE_LOG(ERR, EAL,
2785                                 "fail to disable hotplug handling.\n");
2786                         return;
2787                 }
2788         }
2789         for (i = 0 ; i < RTE_MAX_NUMA_NODES ; i++) {
2790                 if (mempools[i])
2791                         rte_mempool_free(mempools[i]);
2792         }
2793
2794         printf("\nBye...\n");
2795 }
2796
2797 typedef void (*cmd_func_t)(void);
2798 struct pmd_test_command {
2799         const char *cmd_name;
2800         cmd_func_t cmd_func;
2801 };
2802
2803 /* Check the link status of all ports in up to 9s, and print them finally */
2804 static void
2805 check_all_ports_link_status(uint32_t port_mask)
2806 {
2807 #define CHECK_INTERVAL 100 /* 100ms */
2808 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
2809         portid_t portid;
2810         uint8_t count, all_ports_up, print_flag = 0;
2811         struct rte_eth_link link;
2812         int ret;
2813
2814         printf("Checking link statuses...\n");
2815         fflush(stdout);
2816         for (count = 0; count <= MAX_CHECK_TIME; count++) {
2817                 all_ports_up = 1;
2818                 RTE_ETH_FOREACH_DEV(portid) {
2819                         if ((port_mask & (1 << portid)) == 0)
2820                                 continue;
2821                         memset(&link, 0, sizeof(link));
2822                         ret = rte_eth_link_get_nowait(portid, &link);
2823                         if (ret < 0) {
2824                                 all_ports_up = 0;
2825                                 if (print_flag == 1)
2826                                         printf("Port %u link get failed: %s\n",
2827                                                 portid, rte_strerror(-ret));
2828                                 continue;
2829                         }
2830                         /* print link status if flag set */
2831                         if (print_flag == 1) {
2832                                 if (link.link_status)
2833                                         printf(
2834                                         "Port%d Link Up. speed %u Mbps- %s\n",
2835                                         portid, link.link_speed,
2836                                 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
2837                                         ("full-duplex") : ("half-duplex\n"));
2838                                 else
2839                                         printf("Port %d Link Down\n", portid);
2840                                 continue;
2841                         }
2842                         /* clear all_ports_up flag if any link down */
2843                         if (link.link_status == ETH_LINK_DOWN) {
2844                                 all_ports_up = 0;
2845                                 break;
2846                         }
2847                 }
2848                 /* after finally printing all link status, get out */
2849                 if (print_flag == 1)
2850                         break;
2851
2852                 if (all_ports_up == 0) {
2853                         fflush(stdout);
2854                         rte_delay_ms(CHECK_INTERVAL);
2855                 }
2856
2857                 /* set the print_flag if all ports up or timeout */
2858                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
2859                         print_flag = 1;
2860                 }
2861
2862                 if (lsc_interrupt)
2863                         break;
2864         }
2865 }
2866
2867 /*
2868  * This callback is for remove a port for a device. It has limitation because
2869  * it is not for multiple port removal for a device.
2870  * TODO: the device detach invoke will plan to be removed from user side to
2871  * eal. And convert all PMDs to free port resources on ether device closing.
2872  */
2873 static void
2874 rmv_port_callback(void *arg)
2875 {
2876         int need_to_start = 0;
2877         int org_no_link_check = no_link_check;
2878         portid_t port_id = (intptr_t)arg;
2879         struct rte_device *dev;
2880
2881         RTE_ETH_VALID_PORTID_OR_RET(port_id);
2882
2883         if (!test_done && port_is_forwarding(port_id)) {
2884                 need_to_start = 1;
2885                 stop_packet_forwarding();
2886         }
2887         no_link_check = 1;
2888         stop_port(port_id);
2889         no_link_check = org_no_link_check;
2890
2891         /* Save rte_device pointer before closing ethdev port */
2892         dev = rte_eth_devices[port_id].device;
2893         close_port(port_id);
2894         detach_device(dev); /* might be already removed or have more ports */
2895
2896         if (need_to_start)
2897                 start_packet_forwarding(0);
2898 }
2899
2900 /* This function is used by the interrupt thread */
2901 static int
2902 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
2903                   void *ret_param)
2904 {
2905         RTE_SET_USED(param);
2906         RTE_SET_USED(ret_param);
2907
2908         if (type >= RTE_ETH_EVENT_MAX) {
2909                 fprintf(stderr, "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
2910                         port_id, __func__, type);
2911                 fflush(stderr);
2912         } else if (event_print_mask & (UINT32_C(1) << type)) {
2913                 printf("\nPort %" PRIu16 ": %s event\n", port_id,
2914                         eth_event_desc[type]);
2915                 fflush(stdout);
2916         }
2917
2918         switch (type) {
2919         case RTE_ETH_EVENT_NEW:
2920                 ports[port_id].need_setup = 1;
2921                 ports[port_id].port_status = RTE_PORT_HANDLING;
2922                 break;
2923         case RTE_ETH_EVENT_INTR_RMV:
2924                 if (port_id_is_invalid(port_id, DISABLED_WARN))
2925                         break;
2926                 if (rte_eal_alarm_set(100000,
2927                                 rmv_port_callback, (void *)(intptr_t)port_id))
2928                         fprintf(stderr, "Could not set up deferred device removal\n");
2929                 break;
2930         default:
2931                 break;
2932         }
2933         return 0;
2934 }
2935
2936 static int
2937 register_eth_event_callback(void)
2938 {
2939         int ret;
2940         enum rte_eth_event_type event;
2941
2942         for (event = RTE_ETH_EVENT_UNKNOWN;
2943                         event < RTE_ETH_EVENT_MAX; event++) {
2944                 ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
2945                                 event,
2946                                 eth_event_callback,
2947                                 NULL);
2948                 if (ret != 0) {
2949                         TESTPMD_LOG(ERR, "Failed to register callback for "
2950                                         "%s event\n", eth_event_desc[event]);
2951                         return -1;
2952                 }
2953         }
2954
2955         return 0;
2956 }
2957
2958 /* This function is used by the interrupt thread */
2959 static void
2960 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
2961                              __rte_unused void *arg)
2962 {
2963         uint16_t port_id;
2964         int ret;
2965
2966         if (type >= RTE_DEV_EVENT_MAX) {
2967                 fprintf(stderr, "%s called upon invalid event %d\n",
2968                         __func__, type);
2969                 fflush(stderr);
2970         }
2971
2972         switch (type) {
2973         case RTE_DEV_EVENT_REMOVE:
2974                 RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
2975                         device_name);
2976                 ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
2977                 if (ret) {
2978                         RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
2979                                 device_name);
2980                         return;
2981                 }
2982                 /*
2983                  * Because the user's callback is invoked in eal interrupt
2984                  * callback, the interrupt callback need to be finished before
2985                  * it can be unregistered when detaching device. So finish
2986                  * callback soon and use a deferred removal to detach device
2987                  * is need. It is a workaround, once the device detaching be
2988                  * moved into the eal in the future, the deferred removal could
2989                  * be deleted.
2990                  */
2991                 if (rte_eal_alarm_set(100000,
2992                                 rmv_port_callback, (void *)(intptr_t)port_id))
2993                         RTE_LOG(ERR, EAL,
2994                                 "Could not set up deferred device removal\n");
2995                 break;
2996         case RTE_DEV_EVENT_ADD:
2997                 RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
2998                         device_name);
2999                 /* TODO: After finish kernel driver binding,
3000                  * begin to attach port.
3001                  */
3002                 break;
3003         default:
3004                 break;
3005         }
3006 }
3007
3008 static int
3009 set_tx_queue_stats_mapping_registers(portid_t port_id, struct rte_port *port)
3010 {
3011         uint16_t i;
3012         int diag;
3013         uint8_t mapping_found = 0;
3014
3015         for (i = 0; i < nb_tx_queue_stats_mappings; i++) {
3016                 if ((tx_queue_stats_mappings[i].port_id == port_id) &&
3017                                 (tx_queue_stats_mappings[i].queue_id < nb_txq )) {
3018                         diag = rte_eth_dev_set_tx_queue_stats_mapping(port_id,
3019                                         tx_queue_stats_mappings[i].queue_id,
3020                                         tx_queue_stats_mappings[i].stats_counter_id);
3021                         if (diag != 0)
3022                                 return diag;
3023                         mapping_found = 1;
3024                 }
3025         }
3026         if (mapping_found)
3027                 port->tx_queue_stats_mapping_enabled = 1;
3028         return 0;
3029 }
3030
3031 static int
3032 set_rx_queue_stats_mapping_registers(portid_t port_id, struct rte_port *port)
3033 {
3034         uint16_t i;
3035         int diag;
3036         uint8_t mapping_found = 0;
3037
3038         for (i = 0; i < nb_rx_queue_stats_mappings; i++) {
3039                 if ((rx_queue_stats_mappings[i].port_id == port_id) &&
3040                                 (rx_queue_stats_mappings[i].queue_id < nb_rxq )) {
3041                         diag = rte_eth_dev_set_rx_queue_stats_mapping(port_id,
3042                                         rx_queue_stats_mappings[i].queue_id,
3043                                         rx_queue_stats_mappings[i].stats_counter_id);
3044                         if (diag != 0)
3045                                 return diag;
3046                         mapping_found = 1;
3047                 }
3048         }
3049         if (mapping_found)
3050                 port->rx_queue_stats_mapping_enabled = 1;
3051         return 0;
3052 }
3053
3054 static void
3055 map_port_queue_stats_mapping_registers(portid_t pi, struct rte_port *port)
3056 {
3057         int diag = 0;
3058
3059         diag = set_tx_queue_stats_mapping_registers(pi, port);
3060         if (diag != 0) {
3061                 if (diag == -ENOTSUP) {
3062                         port->tx_queue_stats_mapping_enabled = 0;
3063                         printf("TX queue stats mapping not supported port id=%d\n", pi);
3064                 }
3065                 else
3066                         rte_exit(EXIT_FAILURE,
3067                                         "set_tx_queue_stats_mapping_registers "
3068                                         "failed for port id=%d diag=%d\n",
3069                                         pi, diag);
3070         }
3071
3072         diag = set_rx_queue_stats_mapping_registers(pi, port);
3073         if (diag != 0) {
3074                 if (diag == -ENOTSUP) {
3075                         port->rx_queue_stats_mapping_enabled = 0;
3076                         printf("RX queue stats mapping not supported port id=%d\n", pi);
3077                 }
3078                 else
3079                         rte_exit(EXIT_FAILURE,
3080                                         "set_rx_queue_stats_mapping_registers "
3081                                         "failed for port id=%d diag=%d\n",
3082                                         pi, diag);
3083         }
3084 }
3085
3086 static void
3087 rxtx_port_config(struct rte_port *port)
3088 {
3089         uint16_t qid;
3090         uint64_t offloads;
3091
3092         for (qid = 0; qid < nb_rxq; qid++) {
3093                 offloads = port->rx_conf[qid].offloads;
3094                 port->rx_conf[qid] = port->dev_info.default_rxconf;
3095                 if (offloads != 0)
3096                         port->rx_conf[qid].offloads = offloads;
3097
3098                 /* Check if any Rx parameters have been passed */
3099                 if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3100                         port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3101
3102                 if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3103                         port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3104
3105                 if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3106                         port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3107
3108                 if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3109                         port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3110
3111                 if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3112                         port->rx_conf[qid].rx_drop_en = rx_drop_en;
3113
3114                 port->nb_rx_desc[qid] = nb_rxd;
3115         }
3116
3117         for (qid = 0; qid < nb_txq; qid++) {
3118                 offloads = port->tx_conf[qid].offloads;
3119                 port->tx_conf[qid] = port->dev_info.default_txconf;
3120                 if (offloads != 0)
3121                         port->tx_conf[qid].offloads = offloads;
3122
3123                 /* Check if any Tx parameters have been passed */
3124                 if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3125                         port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3126
3127                 if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3128                         port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3129
3130                 if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3131                         port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3132
3133                 if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3134                         port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3135
3136                 if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3137                         port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3138
3139                 port->nb_tx_desc[qid] = nb_txd;
3140         }
3141 }
3142
3143 void
3144 init_port_config(void)
3145 {
3146         portid_t pid;
3147         struct rte_port *port;
3148         int ret;
3149
3150         RTE_ETH_FOREACH_DEV(pid) {
3151                 port = &ports[pid];
3152                 port->dev_conf.fdir_conf = fdir_conf;
3153
3154                 ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3155                 if (ret != 0)
3156                         return;
3157
3158                 if (nb_rxq > 1) {
3159                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3160                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3161                                 rss_hf & port->dev_info.flow_type_rss_offloads;
3162                 } else {
3163                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3164                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3165                 }
3166
3167                 if (port->dcb_flag == 0) {
3168                         if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
3169                                 port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
3170                         else
3171                                 port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3172                 }
3173
3174                 rxtx_port_config(port);
3175
3176                 ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3177                 if (ret != 0)
3178                         return;
3179
3180                 map_port_queue_stats_mapping_registers(pid, port);
3181 #if defined RTE_LIBRTE_IXGBE_PMD && defined RTE_LIBRTE_IXGBE_BYPASS
3182                 rte_pmd_ixgbe_bypass_init(pid);
3183 #endif
3184
3185                 if (lsc_interrupt &&
3186                     (rte_eth_devices[pid].data->dev_flags &
3187                      RTE_ETH_DEV_INTR_LSC))
3188                         port->dev_conf.intr_conf.lsc = 1;
3189                 if (rmv_interrupt &&
3190                     (rte_eth_devices[pid].data->dev_flags &
3191                      RTE_ETH_DEV_INTR_RMV))
3192                         port->dev_conf.intr_conf.rmv = 1;
3193         }
3194 }
3195
3196 void set_port_slave_flag(portid_t slave_pid)
3197 {
3198         struct rte_port *port;
3199
3200         port = &ports[slave_pid];
3201         port->slave_flag = 1;
3202 }
3203
3204 void clear_port_slave_flag(portid_t slave_pid)
3205 {
3206         struct rte_port *port;
3207
3208         port = &ports[slave_pid];
3209         port->slave_flag = 0;
3210 }
3211
3212 uint8_t port_is_bonding_slave(portid_t slave_pid)
3213 {
3214         struct rte_port *port;
3215
3216         port = &ports[slave_pid];
3217         if ((rte_eth_devices[slave_pid].data->dev_flags &
3218             RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3219                 return 1;
3220         return 0;
3221 }
3222
3223 const uint16_t vlan_tags[] = {
3224                 0,  1,  2,  3,  4,  5,  6,  7,
3225                 8,  9, 10, 11,  12, 13, 14, 15,
3226                 16, 17, 18, 19, 20, 21, 22, 23,
3227                 24, 25, 26, 27, 28, 29, 30, 31
3228 };
3229
3230 static  int
3231 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3232                  enum dcb_mode_enable dcb_mode,
3233                  enum rte_eth_nb_tcs num_tcs,
3234                  uint8_t pfc_en)
3235 {
3236         uint8_t i;
3237         int32_t rc;
3238         struct rte_eth_rss_conf rss_conf;
3239
3240         /*
3241          * Builds up the correct configuration for dcb+vt based on the vlan tags array
3242          * given above, and the number of traffic classes available for use.
3243          */
3244         if (dcb_mode == DCB_VT_ENABLED) {
3245                 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3246                                 &eth_conf->rx_adv_conf.vmdq_dcb_conf;
3247                 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3248                                 &eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3249
3250                 /* VMDQ+DCB RX and TX configurations */
3251                 vmdq_rx_conf->enable_default_pool = 0;
3252                 vmdq_rx_conf->default_pool = 0;
3253                 vmdq_rx_conf->nb_queue_pools =
3254                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3255                 vmdq_tx_conf->nb_queue_pools =
3256                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3257
3258                 vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3259                 for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3260                         vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3261                         vmdq_rx_conf->pool_map[i].pools =
3262                                 1 << (i % vmdq_rx_conf->nb_queue_pools);
3263                 }
3264                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3265                         vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3266                         vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3267                 }
3268
3269                 /* set DCB mode of RX and TX of multiple queues */
3270                 eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_DCB;
3271                 eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3272         } else {
3273                 struct rte_eth_dcb_rx_conf *rx_conf =
3274                                 &eth_conf->rx_adv_conf.dcb_rx_conf;
3275                 struct rte_eth_dcb_tx_conf *tx_conf =
3276                                 &eth_conf->tx_adv_conf.dcb_tx_conf;
3277
3278                 rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3279                 if (rc != 0)
3280                         return rc;
3281
3282                 rx_conf->nb_tcs = num_tcs;
3283                 tx_conf->nb_tcs = num_tcs;
3284
3285                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3286                         rx_conf->dcb_tc[i] = i % num_tcs;
3287                         tx_conf->dcb_tc[i] = i % num_tcs;
3288                 }
3289
3290                 eth_conf->rxmode.mq_mode = ETH_MQ_RX_DCB_RSS;
3291                 eth_conf->rx_adv_conf.rss_conf = rss_conf;
3292                 eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3293         }
3294
3295         if (pfc_en)
3296                 eth_conf->dcb_capability_en =
3297                                 ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3298         else
3299                 eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3300
3301         return 0;
3302 }
3303
3304 int
3305 init_port_dcb_config(portid_t pid,
3306                      enum dcb_mode_enable dcb_mode,
3307                      enum rte_eth_nb_tcs num_tcs,
3308                      uint8_t pfc_en)
3309 {
3310         struct rte_eth_conf port_conf;
3311         struct rte_port *rte_port;
3312         int retval;
3313         uint16_t i;
3314
3315         rte_port = &ports[pid];
3316
3317         memset(&port_conf, 0, sizeof(struct rte_eth_conf));
3318         /* Enter DCB configuration status */
3319         dcb_config = 1;
3320
3321         port_conf.rxmode = rte_port->dev_conf.rxmode;
3322         port_conf.txmode = rte_port->dev_conf.txmode;
3323
3324         /*set configuration of DCB in vt mode and DCB in non-vt mode*/
3325         retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3326         if (retval < 0)
3327                 return retval;
3328         port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3329
3330         /* re-configure the device . */
3331         retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3332         if (retval < 0)
3333                 return retval;
3334
3335         retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3336         if (retval != 0)
3337                 return retval;
3338
3339         /* If dev_info.vmdq_pool_base is greater than 0,
3340          * the queue id of vmdq pools is started after pf queues.
3341          */
3342         if (dcb_mode == DCB_VT_ENABLED &&
3343             rte_port->dev_info.vmdq_pool_base > 0) {
3344                 printf("VMDQ_DCB multi-queue mode is nonsensical"
3345                         " for port %d.", pid);
3346                 return -1;
3347         }
3348
3349         /* Assume the ports in testpmd have the same dcb capability
3350          * and has the same number of rxq and txq in dcb mode
3351          */
3352         if (dcb_mode == DCB_VT_ENABLED) {
3353                 if (rte_port->dev_info.max_vfs > 0) {
3354                         nb_rxq = rte_port->dev_info.nb_rx_queues;
3355                         nb_txq = rte_port->dev_info.nb_tx_queues;
3356                 } else {
3357                         nb_rxq = rte_port->dev_info.max_rx_queues;
3358                         nb_txq = rte_port->dev_info.max_tx_queues;
3359                 }
3360         } else {
3361                 /*if vt is disabled, use all pf queues */
3362                 if (rte_port->dev_info.vmdq_pool_base == 0) {
3363                         nb_rxq = rte_port->dev_info.max_rx_queues;
3364                         nb_txq = rte_port->dev_info.max_tx_queues;
3365                 } else {
3366                         nb_rxq = (queueid_t)num_tcs;
3367                         nb_txq = (queueid_t)num_tcs;
3368
3369                 }
3370         }
3371         rx_free_thresh = 64;
3372
3373         memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3374
3375         rxtx_port_config(rte_port);
3376         /* VLAN filter */
3377         rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3378         for (i = 0; i < RTE_DIM(vlan_tags); i++)
3379                 rx_vft_set(pid, vlan_tags[i], 1);
3380
3381         retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
3382         if (retval != 0)
3383                 return retval;
3384
3385         map_port_queue_stats_mapping_registers(pid, rte_port);
3386
3387         rte_port->dcb_flag = 1;
3388
3389         return 0;
3390 }
3391
3392 static void
3393 init_port(void)
3394 {
3395         /* Configuration of Ethernet ports. */
3396         ports = rte_zmalloc("testpmd: ports",
3397                             sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
3398                             RTE_CACHE_LINE_SIZE);
3399         if (ports == NULL) {
3400                 rte_exit(EXIT_FAILURE,
3401                                 "rte_zmalloc(%d struct rte_port) failed\n",
3402                                 RTE_MAX_ETHPORTS);
3403         }
3404
3405         /* Initialize ports NUMA structures */
3406         memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3407         memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3408         memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3409 }
3410
3411 static void
3412 force_quit(void)
3413 {
3414         pmd_test_exit();
3415         prompt_exit();
3416 }
3417
3418 static void
3419 print_stats(void)
3420 {
3421         uint8_t i;
3422         const char clr[] = { 27, '[', '2', 'J', '\0' };
3423         const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
3424
3425         /* Clear screen and move to top left */
3426         printf("%s%s", clr, top_left);
3427
3428         printf("\nPort statistics ====================================");
3429         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
3430                 nic_stats_display(fwd_ports_ids[i]);
3431
3432         fflush(stdout);
3433 }
3434
3435 static void
3436 signal_handler(int signum)
3437 {
3438         if (signum == SIGINT || signum == SIGTERM) {
3439                 printf("\nSignal %d received, preparing to exit...\n",
3440                                 signum);
3441 #ifdef RTE_LIBRTE_PDUMP
3442                 /* uninitialize packet capture framework */
3443                 rte_pdump_uninit();
3444 #endif
3445 #ifdef RTE_LIBRTE_LATENCY_STATS
3446                 if (latencystats_enabled != 0)
3447                         rte_latencystats_uninit();
3448 #endif
3449                 force_quit();
3450                 /* Set flag to indicate the force termination. */
3451                 f_quit = 1;
3452                 /* exit with the expected status */
3453                 signal(signum, SIG_DFL);
3454                 kill(getpid(), signum);
3455         }
3456 }
3457
3458 int
3459 main(int argc, char** argv)
3460 {
3461         int diag;
3462         portid_t port_id;
3463         uint16_t count;
3464         int ret;
3465
3466         signal(SIGINT, signal_handler);
3467         signal(SIGTERM, signal_handler);
3468
3469         testpmd_logtype = rte_log_register("testpmd");
3470         if (testpmd_logtype < 0)
3471                 rte_exit(EXIT_FAILURE, "Cannot register log type");
3472         rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
3473
3474         diag = rte_eal_init(argc, argv);
3475         if (diag < 0)
3476                 rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
3477                          rte_strerror(rte_errno));
3478
3479         if (rte_eal_process_type() == RTE_PROC_SECONDARY)
3480                 rte_exit(EXIT_FAILURE,
3481                          "Secondary process type not supported.\n");
3482
3483         ret = register_eth_event_callback();
3484         if (ret != 0)
3485                 rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
3486
3487 #ifdef RTE_LIBRTE_PDUMP
3488         /* initialize packet capture framework */
3489         rte_pdump_init();
3490 #endif
3491
3492         count = 0;
3493         RTE_ETH_FOREACH_DEV(port_id) {
3494                 ports_ids[count] = port_id;
3495                 count++;
3496         }
3497         nb_ports = (portid_t) count;
3498         if (nb_ports == 0)
3499                 TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
3500
3501         /* allocate port structures, and init them */
3502         init_port();
3503
3504         set_def_fwd_config();
3505         if (nb_lcores == 0)
3506                 rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
3507                          "Check the core mask argument\n");
3508
3509         /* Bitrate/latency stats disabled by default */
3510 #ifdef RTE_LIBRTE_BITRATE
3511         bitrate_enabled = 0;
3512 #endif
3513 #ifdef RTE_LIBRTE_LATENCY_STATS
3514         latencystats_enabled = 0;
3515 #endif
3516
3517         /* on FreeBSD, mlockall() is disabled by default */
3518 #ifdef RTE_EXEC_ENV_FREEBSD
3519         do_mlockall = 0;
3520 #else
3521         do_mlockall = 1;
3522 #endif
3523
3524         argc -= diag;
3525         argv += diag;
3526         if (argc > 1)
3527                 launch_args_parse(argc, argv);
3528
3529         if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
3530                 TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
3531                         strerror(errno));
3532         }
3533
3534         if (tx_first && interactive)
3535                 rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
3536                                 "interactive mode.\n");
3537
3538         if (tx_first && lsc_interrupt) {
3539                 printf("Warning: lsc_interrupt needs to be off when "
3540                                 " using tx_first. Disabling.\n");
3541                 lsc_interrupt = 0;
3542         }
3543
3544         if (!nb_rxq && !nb_txq)
3545                 printf("Warning: Either rx or tx queues should be non-zero\n");
3546
3547         if (nb_rxq > 1 && nb_rxq > nb_txq)
3548                 printf("Warning: nb_rxq=%d enables RSS configuration, "
3549                        "but nb_txq=%d will prevent to fully test it.\n",
3550                        nb_rxq, nb_txq);
3551
3552         init_config();
3553
3554         if (hot_plug) {
3555                 ret = rte_dev_hotplug_handle_enable();
3556                 if (ret) {
3557                         RTE_LOG(ERR, EAL,
3558                                 "fail to enable hotplug handling.");
3559                         return -1;
3560                 }
3561
3562                 ret = rte_dev_event_monitor_start();
3563                 if (ret) {
3564                         RTE_LOG(ERR, EAL,
3565                                 "fail to start device event monitoring.");
3566                         return -1;
3567                 }
3568
3569                 ret = rte_dev_event_callback_register(NULL,
3570                         dev_event_callback, NULL);
3571                 if (ret) {
3572                         RTE_LOG(ERR, EAL,
3573                                 "fail  to register device event callback\n");
3574                         return -1;
3575                 }
3576         }
3577
3578         if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
3579                 rte_exit(EXIT_FAILURE, "Start ports failed\n");
3580
3581         /* set all ports to promiscuous mode by default */
3582         RTE_ETH_FOREACH_DEV(port_id) {
3583                 ret = rte_eth_promiscuous_enable(port_id);
3584                 if (ret != 0)
3585                         printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
3586                                 port_id, rte_strerror(-ret));
3587         }
3588
3589         /* Init metrics library */
3590         rte_metrics_init(rte_socket_id());
3591
3592 #ifdef RTE_LIBRTE_LATENCY_STATS
3593         if (latencystats_enabled != 0) {
3594                 int ret = rte_latencystats_init(1, NULL);
3595                 if (ret)
3596                         printf("Warning: latencystats init()"
3597                                 " returned error %d\n", ret);
3598                 printf("Latencystats running on lcore %d\n",
3599                         latencystats_lcore_id);
3600         }
3601 #endif
3602
3603         /* Setup bitrate stats */
3604 #ifdef RTE_LIBRTE_BITRATE
3605         if (bitrate_enabled != 0) {
3606                 bitrate_data = rte_stats_bitrate_create();
3607                 if (bitrate_data == NULL)
3608                         rte_exit(EXIT_FAILURE,
3609                                 "Could not allocate bitrate data.\n");
3610                 rte_stats_bitrate_reg(bitrate_data);
3611         }
3612 #endif
3613
3614 #ifdef RTE_LIBRTE_CMDLINE
3615         if (strlen(cmdline_filename) != 0)
3616                 cmdline_read_from_file(cmdline_filename);
3617
3618         if (interactive == 1) {
3619                 if (auto_start) {
3620                         printf("Start automatic packet forwarding\n");
3621                         start_packet_forwarding(0);
3622                 }
3623                 prompt();
3624                 pmd_test_exit();
3625         } else
3626 #endif
3627         {
3628                 char c;
3629                 int rc;
3630
3631                 f_quit = 0;
3632
3633                 printf("No commandline core given, start packet forwarding\n");
3634                 start_packet_forwarding(tx_first);
3635                 if (stats_period != 0) {
3636                         uint64_t prev_time = 0, cur_time, diff_time = 0;
3637                         uint64_t timer_period;
3638
3639                         /* Convert to number of cycles */
3640                         timer_period = stats_period * rte_get_timer_hz();
3641
3642                         while (f_quit == 0) {
3643                                 cur_time = rte_get_timer_cycles();
3644                                 diff_time += cur_time - prev_time;
3645
3646                                 if (diff_time >= timer_period) {
3647                                         print_stats();
3648                                         /* Reset the timer */
3649                                         diff_time = 0;
3650                                 }
3651                                 /* Sleep to avoid unnecessary checks */
3652                                 prev_time = cur_time;
3653                                 sleep(1);
3654                         }
3655                 }
3656
3657                 printf("Press enter to exit\n");
3658                 rc = read(0, &c, 1);
3659                 pmd_test_exit();
3660                 if (rc < 0)
3661                         return 1;
3662         }
3663
3664         ret = rte_eal_cleanup();
3665         if (ret != 0)
3666                 rte_exit(EXIT_FAILURE,
3667                          "EAL cleanup failed: %s\n", strerror(-ret));
3668
3669         return EXIT_SUCCESS;
3670 }