app/testpmd: reduce memory consumption
[dpdk.git] / app / test-pmd / testpmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #include <sys/mman.h>
13 #include <sys/types.h>
14 #include <errno.h>
15 #include <stdbool.h>
16
17 #include <sys/queue.h>
18 #include <sys/stat.h>
19
20 #include <stdint.h>
21 #include <unistd.h>
22 #include <inttypes.h>
23
24 #include <rte_common.h>
25 #include <rte_errno.h>
26 #include <rte_byteorder.h>
27 #include <rte_log.h>
28 #include <rte_debug.h>
29 #include <rte_cycles.h>
30 #include <rte_memory.h>
31 #include <rte_memcpy.h>
32 #include <rte_launch.h>
33 #include <rte_eal.h>
34 #include <rte_alarm.h>
35 #include <rte_per_lcore.h>
36 #include <rte_lcore.h>
37 #include <rte_atomic.h>
38 #include <rte_branch_prediction.h>
39 #include <rte_mempool.h>
40 #include <rte_malloc.h>
41 #include <rte_mbuf.h>
42 #include <rte_mbuf_pool_ops.h>
43 #include <rte_interrupts.h>
44 #include <rte_pci.h>
45 #include <rte_ether.h>
46 #include <rte_ethdev.h>
47 #include <rte_dev.h>
48 #include <rte_string_fns.h>
49 #ifdef RTE_LIBRTE_IXGBE_PMD
50 #include <rte_pmd_ixgbe.h>
51 #endif
52 #ifdef RTE_LIBRTE_PDUMP
53 #include <rte_pdump.h>
54 #endif
55 #include <rte_flow.h>
56 #include <rte_metrics.h>
57 #ifdef RTE_LIBRTE_BITRATE
58 #include <rte_bitrate.h>
59 #endif
60 #ifdef RTE_LIBRTE_LATENCY_STATS
61 #include <rte_latencystats.h>
62 #endif
63
64 #include "testpmd.h"
65
66 #ifndef MAP_HUGETLB
67 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
68 #define HUGE_FLAG (0x40000)
69 #else
70 #define HUGE_FLAG MAP_HUGETLB
71 #endif
72
73 #ifndef MAP_HUGE_SHIFT
74 /* older kernels (or FreeBSD) will not have this define */
75 #define HUGE_SHIFT (26)
76 #else
77 #define HUGE_SHIFT MAP_HUGE_SHIFT
78 #endif
79
80 #define EXTMEM_HEAP_NAME "extmem"
81
82 uint16_t verbose_level = 0; /**< Silent by default. */
83 int testpmd_logtype; /**< Log type for testpmd logs */
84
85 /* use master core for command line ? */
86 uint8_t interactive = 0;
87 uint8_t auto_start = 0;
88 uint8_t tx_first;
89 char cmdline_filename[PATH_MAX] = {0};
90
91 /*
92  * NUMA support configuration.
93  * When set, the NUMA support attempts to dispatch the allocation of the
94  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
95  * probed ports among the CPU sockets 0 and 1.
96  * Otherwise, all memory is allocated from CPU socket 0.
97  */
98 uint8_t numa_support = 1; /**< numa enabled by default */
99
100 /*
101  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
102  * not configured.
103  */
104 uint8_t socket_num = UMA_NO_CONFIG;
105
106 /*
107  * Select mempool allocation type:
108  * - native: use regular DPDK memory
109  * - anon: use regular DPDK memory to create mempool, but populate using
110  *         anonymous memory (may not be IOVA-contiguous)
111  * - xmem: use externally allocated hugepage memory
112  */
113 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
114
115 /*
116  * Store specified sockets on which memory pool to be used by ports
117  * is allocated.
118  */
119 uint8_t port_numa[RTE_MAX_ETHPORTS];
120
121 /*
122  * Store specified sockets on which RX ring to be used by ports
123  * is allocated.
124  */
125 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
126
127 /*
128  * Store specified sockets on which TX ring to be used by ports
129  * is allocated.
130  */
131 uint8_t txring_numa[RTE_MAX_ETHPORTS];
132
133 /*
134  * Record the Ethernet address of peer target ports to which packets are
135  * forwarded.
136  * Must be instantiated with the ethernet addresses of peer traffic generator
137  * ports.
138  */
139 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
140 portid_t nb_peer_eth_addrs = 0;
141
142 /*
143  * Probed Target Environment.
144  */
145 struct rte_port *ports;        /**< For all probed ethernet ports. */
146 portid_t nb_ports;             /**< Number of probed ethernet ports. */
147 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
148 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
149
150 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
151
152 /*
153  * Test Forwarding Configuration.
154  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
155  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
156  */
157 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
158 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
159 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
160 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
161
162 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
163 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
164
165 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
166 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
167
168 /*
169  * Forwarding engines.
170  */
171 struct fwd_engine * fwd_engines[] = {
172         &io_fwd_engine,
173         &mac_fwd_engine,
174         &mac_swap_engine,
175         &flow_gen_engine,
176         &rx_only_engine,
177         &tx_only_engine,
178         &csum_fwd_engine,
179         &icmp_echo_engine,
180         &noisy_vnf_engine,
181 #if defined RTE_LIBRTE_PMD_SOFTNIC
182         &softnic_fwd_engine,
183 #endif
184 #ifdef RTE_LIBRTE_IEEE1588
185         &ieee1588_fwd_engine,
186 #endif
187         NULL,
188 };
189
190 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES];
191 uint16_t mempool_flags;
192
193 struct fwd_config cur_fwd_config;
194 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
195 uint32_t retry_enabled;
196 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
197 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
198
199 uint16_t mbuf_data_size = DEFAULT_MBUF_DATA_SIZE; /**< Mbuf data space size. */
200 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
201                                       * specified on command-line. */
202 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
203
204 /*
205  * In container, it cannot terminate the process which running with 'stats-period'
206  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
207  */
208 uint8_t f_quit;
209
210 /*
211  * Configuration of packet segments used by the "txonly" processing engine.
212  */
213 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
214 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
215         TXONLY_DEF_PACKET_LEN,
216 };
217 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
218
219 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
220 /**< Split policy for packets to TX. */
221
222 uint8_t txonly_multi_flow;
223 /**< Whether multiple flows are generated in TXONLY mode. */
224
225 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
226 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
227
228 /* current configuration is in DCB or not,0 means it is not in DCB mode */
229 uint8_t dcb_config = 0;
230
231 /* Whether the dcb is in testing status */
232 uint8_t dcb_test = 0;
233
234 /*
235  * Configurable number of RX/TX queues.
236  */
237 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
238 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
239 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
240
241 /*
242  * Configurable number of RX/TX ring descriptors.
243  * Defaults are supplied by drivers via ethdev.
244  */
245 #define RTE_TEST_RX_DESC_DEFAULT 0
246 #define RTE_TEST_TX_DESC_DEFAULT 0
247 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
248 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
249
250 #define RTE_PMD_PARAM_UNSET -1
251 /*
252  * Configurable values of RX and TX ring threshold registers.
253  */
254
255 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
256 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
257 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
258
259 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
260 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
261 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
262
263 /*
264  * Configurable value of RX free threshold.
265  */
266 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
267
268 /*
269  * Configurable value of RX drop enable.
270  */
271 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
272
273 /*
274  * Configurable value of TX free threshold.
275  */
276 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
277
278 /*
279  * Configurable value of TX RS bit threshold.
280  */
281 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
282
283 /*
284  * Configurable value of buffered packets before sending.
285  */
286 uint16_t noisy_tx_sw_bufsz;
287
288 /*
289  * Configurable value of packet buffer timeout.
290  */
291 uint16_t noisy_tx_sw_buf_flush_time;
292
293 /*
294  * Configurable value for size of VNF internal memory area
295  * used for simulating noisy neighbour behaviour
296  */
297 uint64_t noisy_lkup_mem_sz;
298
299 /*
300  * Configurable value of number of random writes done in
301  * VNF simulation memory area.
302  */
303 uint64_t noisy_lkup_num_writes;
304
305 /*
306  * Configurable value of number of random reads done in
307  * VNF simulation memory area.
308  */
309 uint64_t noisy_lkup_num_reads;
310
311 /*
312  * Configurable value of number of random reads/writes done in
313  * VNF simulation memory area.
314  */
315 uint64_t noisy_lkup_num_reads_writes;
316
317 /*
318  * Receive Side Scaling (RSS) configuration.
319  */
320 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
321
322 /*
323  * Port topology configuration
324  */
325 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
326
327 /*
328  * Avoids to flush all the RX streams before starts forwarding.
329  */
330 uint8_t no_flush_rx = 0; /* flush by default */
331
332 /*
333  * Flow API isolated mode.
334  */
335 uint8_t flow_isolate_all;
336
337 /*
338  * Avoids to check link status when starting/stopping a port.
339  */
340 uint8_t no_link_check = 0; /* check by default */
341
342 /*
343  * Don't automatically start all ports in interactive mode.
344  */
345 uint8_t no_device_start = 0;
346
347 /*
348  * Enable link status change notification
349  */
350 uint8_t lsc_interrupt = 1; /* enabled by default */
351
352 /*
353  * Enable device removal notification.
354  */
355 uint8_t rmv_interrupt = 1; /* enabled by default */
356
357 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
358
359 /* After attach, port setup is called on event or by iterator */
360 bool setup_on_probe_event = true;
361
362 /* Clear ptypes on port initialization. */
363 uint8_t clear_ptypes = true;
364
365 /* Pretty printing of ethdev events */
366 static const char * const eth_event_desc[] = {
367         [RTE_ETH_EVENT_UNKNOWN] = "unknown",
368         [RTE_ETH_EVENT_INTR_LSC] = "link state change",
369         [RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
370         [RTE_ETH_EVENT_INTR_RESET] = "reset",
371         [RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
372         [RTE_ETH_EVENT_IPSEC] = "IPsec",
373         [RTE_ETH_EVENT_MACSEC] = "MACsec",
374         [RTE_ETH_EVENT_INTR_RMV] = "device removal",
375         [RTE_ETH_EVENT_NEW] = "device probed",
376         [RTE_ETH_EVENT_DESTROY] = "device released",
377         [RTE_ETH_EVENT_MAX] = NULL,
378 };
379
380 /*
381  * Display or mask ether events
382  * Default to all events except VF_MBOX
383  */
384 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
385                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
386                             (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
387                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
388                             (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
389                             (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
390                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV);
391 /*
392  * Decide if all memory are locked for performance.
393  */
394 int do_mlockall = 0;
395
396 /*
397  * NIC bypass mode configuration options.
398  */
399
400 #if defined RTE_LIBRTE_IXGBE_PMD && defined RTE_LIBRTE_IXGBE_BYPASS
401 /* The NIC bypass watchdog timeout. */
402 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
403 #endif
404
405
406 #ifdef RTE_LIBRTE_LATENCY_STATS
407
408 /*
409  * Set when latency stats is enabled in the commandline
410  */
411 uint8_t latencystats_enabled;
412
413 /*
414  * Lcore ID to serive latency statistics.
415  */
416 lcoreid_t latencystats_lcore_id = -1;
417
418 #endif
419
420 /*
421  * Ethernet device configuration.
422  */
423 struct rte_eth_rxmode rx_mode = {
424         .max_rx_pkt_len = RTE_ETHER_MAX_LEN,
425                 /**< Default maximum frame length. */
426 };
427
428 struct rte_eth_txmode tx_mode = {
429         .offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
430 };
431
432 struct rte_fdir_conf fdir_conf = {
433         .mode = RTE_FDIR_MODE_NONE,
434         .pballoc = RTE_FDIR_PBALLOC_64K,
435         .status = RTE_FDIR_REPORT_STATUS,
436         .mask = {
437                 .vlan_tci_mask = 0xFFEF,
438                 .ipv4_mask     = {
439                         .src_ip = 0xFFFFFFFF,
440                         .dst_ip = 0xFFFFFFFF,
441                 },
442                 .ipv6_mask     = {
443                         .src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
444                         .dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
445                 },
446                 .src_port_mask = 0xFFFF,
447                 .dst_port_mask = 0xFFFF,
448                 .mac_addr_byte_mask = 0xFF,
449                 .tunnel_type_mask = 1,
450                 .tunnel_id_mask = 0xFFFFFFFF,
451         },
452         .drop_queue = 127,
453 };
454
455 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
456
457 struct queue_stats_mappings tx_queue_stats_mappings_array[MAX_TX_QUEUE_STATS_MAPPINGS];
458 struct queue_stats_mappings rx_queue_stats_mappings_array[MAX_RX_QUEUE_STATS_MAPPINGS];
459
460 struct queue_stats_mappings *tx_queue_stats_mappings = tx_queue_stats_mappings_array;
461 struct queue_stats_mappings *rx_queue_stats_mappings = rx_queue_stats_mappings_array;
462
463 uint16_t nb_tx_queue_stats_mappings = 0;
464 uint16_t nb_rx_queue_stats_mappings = 0;
465
466 /*
467  * Display zero values by default for xstats
468  */
469 uint8_t xstats_hide_zero;
470
471 unsigned int num_sockets = 0;
472 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
473
474 #ifdef RTE_LIBRTE_BITRATE
475 /* Bitrate statistics */
476 struct rte_stats_bitrates *bitrate_data;
477 lcoreid_t bitrate_lcore_id;
478 uint8_t bitrate_enabled;
479 #endif
480
481 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
482 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
483
484 /* Forward function declarations */
485 static void setup_attached_port(portid_t pi);
486 static void map_port_queue_stats_mapping_registers(portid_t pi,
487                                                    struct rte_port *port);
488 static void check_all_ports_link_status(uint32_t port_mask);
489 static int eth_event_callback(portid_t port_id,
490                               enum rte_eth_event_type type,
491                               void *param, void *ret_param);
492 static void dev_event_callback(const char *device_name,
493                                 enum rte_dev_event_type type,
494                                 void *param);
495
496 /*
497  * Check if all the ports are started.
498  * If yes, return positive value. If not, return zero.
499  */
500 static int all_ports_started(void);
501
502 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
503 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
504
505 /*
506  * Helper function to check if socket is already discovered.
507  * If yes, return positive value. If not, return zero.
508  */
509 int
510 new_socket_id(unsigned int socket_id)
511 {
512         unsigned int i;
513
514         for (i = 0; i < num_sockets; i++) {
515                 if (socket_ids[i] == socket_id)
516                         return 0;
517         }
518         return 1;
519 }
520
521 /*
522  * Setup default configuration.
523  */
524 static void
525 set_default_fwd_lcores_config(void)
526 {
527         unsigned int i;
528         unsigned int nb_lc;
529         unsigned int sock_num;
530
531         nb_lc = 0;
532         for (i = 0; i < RTE_MAX_LCORE; i++) {
533                 if (!rte_lcore_is_enabled(i))
534                         continue;
535                 sock_num = rte_lcore_to_socket_id(i);
536                 if (new_socket_id(sock_num)) {
537                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
538                                 rte_exit(EXIT_FAILURE,
539                                          "Total sockets greater than %u\n",
540                                          RTE_MAX_NUMA_NODES);
541                         }
542                         socket_ids[num_sockets++] = sock_num;
543                 }
544                 if (i == rte_get_master_lcore())
545                         continue;
546                 fwd_lcores_cpuids[nb_lc++] = i;
547         }
548         nb_lcores = (lcoreid_t) nb_lc;
549         nb_cfg_lcores = nb_lcores;
550         nb_fwd_lcores = 1;
551 }
552
553 static void
554 set_def_peer_eth_addrs(void)
555 {
556         portid_t i;
557
558         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
559                 peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
560                 peer_eth_addrs[i].addr_bytes[5] = i;
561         }
562 }
563
564 static void
565 set_default_fwd_ports_config(void)
566 {
567         portid_t pt_id;
568         int i = 0;
569
570         RTE_ETH_FOREACH_DEV(pt_id) {
571                 fwd_ports_ids[i++] = pt_id;
572
573                 /* Update sockets info according to the attached device */
574                 int socket_id = rte_eth_dev_socket_id(pt_id);
575                 if (socket_id >= 0 && new_socket_id(socket_id)) {
576                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
577                                 rte_exit(EXIT_FAILURE,
578                                          "Total sockets greater than %u\n",
579                                          RTE_MAX_NUMA_NODES);
580                         }
581                         socket_ids[num_sockets++] = socket_id;
582                 }
583         }
584
585         nb_cfg_ports = nb_ports;
586         nb_fwd_ports = nb_ports;
587 }
588
589 void
590 set_def_fwd_config(void)
591 {
592         set_default_fwd_lcores_config();
593         set_def_peer_eth_addrs();
594         set_default_fwd_ports_config();
595 }
596
597 /* extremely pessimistic estimation of memory required to create a mempool */
598 static int
599 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
600 {
601         unsigned int n_pages, mbuf_per_pg, leftover;
602         uint64_t total_mem, mbuf_mem, obj_sz;
603
604         /* there is no good way to predict how much space the mempool will
605          * occupy because it will allocate chunks on the fly, and some of those
606          * will come from default DPDK memory while some will come from our
607          * external memory, so just assume 128MB will be enough for everyone.
608          */
609         uint64_t hdr_mem = 128 << 20;
610
611         /* account for possible non-contiguousness */
612         obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
613         if (obj_sz > pgsz) {
614                 TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
615                 return -1;
616         }
617
618         mbuf_per_pg = pgsz / obj_sz;
619         leftover = (nb_mbufs % mbuf_per_pg) > 0;
620         n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
621
622         mbuf_mem = n_pages * pgsz;
623
624         total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
625
626         if (total_mem > SIZE_MAX) {
627                 TESTPMD_LOG(ERR, "Memory size too big\n");
628                 return -1;
629         }
630         *out = (size_t)total_mem;
631
632         return 0;
633 }
634
635 static int
636 pagesz_flags(uint64_t page_sz)
637 {
638         /* as per mmap() manpage, all page sizes are log2 of page size
639          * shifted by MAP_HUGE_SHIFT
640          */
641         int log2 = rte_log2_u64(page_sz);
642
643         return (log2 << HUGE_SHIFT);
644 }
645
646 static void *
647 alloc_mem(size_t memsz, size_t pgsz, bool huge)
648 {
649         void *addr;
650         int flags;
651
652         /* allocate anonymous hugepages */
653         flags = MAP_ANONYMOUS | MAP_PRIVATE;
654         if (huge)
655                 flags |= HUGE_FLAG | pagesz_flags(pgsz);
656
657         addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
658         if (addr == MAP_FAILED)
659                 return NULL;
660
661         return addr;
662 }
663
664 struct extmem_param {
665         void *addr;
666         size_t len;
667         size_t pgsz;
668         rte_iova_t *iova_table;
669         unsigned int iova_table_len;
670 };
671
672 static int
673 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
674                 bool huge)
675 {
676         uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
677                         RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
678         unsigned int cur_page, n_pages, pgsz_idx;
679         size_t mem_sz, cur_pgsz;
680         rte_iova_t *iovas = NULL;
681         void *addr;
682         int ret;
683
684         for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
685                 /* skip anything that is too big */
686                 if (pgsizes[pgsz_idx] > SIZE_MAX)
687                         continue;
688
689                 cur_pgsz = pgsizes[pgsz_idx];
690
691                 /* if we were told not to allocate hugepages, override */
692                 if (!huge)
693                         cur_pgsz = sysconf(_SC_PAGESIZE);
694
695                 ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
696                 if (ret < 0) {
697                         TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
698                         return -1;
699                 }
700
701                 /* allocate our memory */
702                 addr = alloc_mem(mem_sz, cur_pgsz, huge);
703
704                 /* if we couldn't allocate memory with a specified page size,
705                  * that doesn't mean we can't do it with other page sizes, so
706                  * try another one.
707                  */
708                 if (addr == NULL)
709                         continue;
710
711                 /* store IOVA addresses for every page in this memory area */
712                 n_pages = mem_sz / cur_pgsz;
713
714                 iovas = malloc(sizeof(*iovas) * n_pages);
715
716                 if (iovas == NULL) {
717                         TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
718                         goto fail;
719                 }
720                 /* lock memory if it's not huge pages */
721                 if (!huge)
722                         mlock(addr, mem_sz);
723
724                 /* populate IOVA addresses */
725                 for (cur_page = 0; cur_page < n_pages; cur_page++) {
726                         rte_iova_t iova;
727                         size_t offset;
728                         void *cur;
729
730                         offset = cur_pgsz * cur_page;
731                         cur = RTE_PTR_ADD(addr, offset);
732
733                         /* touch the page before getting its IOVA */
734                         *(volatile char *)cur = 0;
735
736                         iova = rte_mem_virt2iova(cur);
737
738                         iovas[cur_page] = iova;
739                 }
740
741                 break;
742         }
743         /* if we couldn't allocate anything */
744         if (iovas == NULL)
745                 return -1;
746
747         param->addr = addr;
748         param->len = mem_sz;
749         param->pgsz = cur_pgsz;
750         param->iova_table = iovas;
751         param->iova_table_len = n_pages;
752
753         return 0;
754 fail:
755         if (iovas)
756                 free(iovas);
757         if (addr)
758                 munmap(addr, mem_sz);
759
760         return -1;
761 }
762
763 static int
764 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
765 {
766         struct extmem_param param;
767         int socket_id, ret;
768
769         memset(&param, 0, sizeof(param));
770
771         /* check if our heap exists */
772         socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
773         if (socket_id < 0) {
774                 /* create our heap */
775                 ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
776                 if (ret < 0) {
777                         TESTPMD_LOG(ERR, "Cannot create heap\n");
778                         return -1;
779                 }
780         }
781
782         ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
783         if (ret < 0) {
784                 TESTPMD_LOG(ERR, "Cannot create memory area\n");
785                 return -1;
786         }
787
788         /* we now have a valid memory area, so add it to heap */
789         ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
790                         param.addr, param.len, param.iova_table,
791                         param.iova_table_len, param.pgsz);
792
793         /* when using VFIO, memory is automatically mapped for DMA by EAL */
794
795         /* not needed any more */
796         free(param.iova_table);
797
798         if (ret < 0) {
799                 TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
800                 munmap(param.addr, param.len);
801                 return -1;
802         }
803
804         /* success */
805
806         TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
807                         param.len >> 20);
808
809         return 0;
810 }
811 static void
812 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
813              struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
814 {
815         uint16_t pid = 0;
816         int ret;
817
818         RTE_ETH_FOREACH_DEV(pid) {
819                 struct rte_eth_dev *dev =
820                         &rte_eth_devices[pid];
821
822                 ret = rte_dev_dma_unmap(dev->device, memhdr->addr, 0,
823                                         memhdr->len);
824                 if (ret) {
825                         TESTPMD_LOG(DEBUG,
826                                     "unable to DMA unmap addr 0x%p "
827                                     "for device %s\n",
828                                     memhdr->addr, dev->data->name);
829                 }
830         }
831         ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
832         if (ret) {
833                 TESTPMD_LOG(DEBUG,
834                             "unable to un-register addr 0x%p\n", memhdr->addr);
835         }
836 }
837
838 static void
839 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
840            struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
841 {
842         uint16_t pid = 0;
843         size_t page_size = sysconf(_SC_PAGESIZE);
844         int ret;
845
846         ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
847                                   page_size);
848         if (ret) {
849                 TESTPMD_LOG(DEBUG,
850                             "unable to register addr 0x%p\n", memhdr->addr);
851                 return;
852         }
853         RTE_ETH_FOREACH_DEV(pid) {
854                 struct rte_eth_dev *dev =
855                         &rte_eth_devices[pid];
856
857                 ret = rte_dev_dma_map(dev->device, memhdr->addr, 0,
858                                       memhdr->len);
859                 if (ret) {
860                         TESTPMD_LOG(DEBUG,
861                                     "unable to DMA map addr 0x%p "
862                                     "for device %s\n",
863                                     memhdr->addr, dev->data->name);
864                 }
865         }
866 }
867
868 /*
869  * Configuration initialisation done once at init time.
870  */
871 static struct rte_mempool *
872 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
873                  unsigned int socket_id)
874 {
875         char pool_name[RTE_MEMPOOL_NAMESIZE];
876         struct rte_mempool *rte_mp = NULL;
877         uint32_t mb_size;
878
879         mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
880         mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name));
881
882         TESTPMD_LOG(INFO,
883                 "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
884                 pool_name, nb_mbuf, mbuf_seg_size, socket_id);
885
886         switch (mp_alloc_type) {
887         case MP_ALLOC_NATIVE:
888                 {
889                         /* wrapper to rte_mempool_create() */
890                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
891                                         rte_mbuf_best_mempool_ops());
892                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
893                                 mb_mempool_cache, 0, mbuf_seg_size, socket_id);
894                         break;
895                 }
896         case MP_ALLOC_ANON:
897                 {
898                         rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
899                                 mb_size, (unsigned int) mb_mempool_cache,
900                                 sizeof(struct rte_pktmbuf_pool_private),
901                                 socket_id, mempool_flags);
902                         if (rte_mp == NULL)
903                                 goto err;
904
905                         if (rte_mempool_populate_anon(rte_mp) == 0) {
906                                 rte_mempool_free(rte_mp);
907                                 rte_mp = NULL;
908                                 goto err;
909                         }
910                         rte_pktmbuf_pool_init(rte_mp, NULL);
911                         rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
912                         rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
913                         break;
914                 }
915         case MP_ALLOC_XMEM:
916         case MP_ALLOC_XMEM_HUGE:
917                 {
918                         int heap_socket;
919                         bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
920
921                         if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
922                                 rte_exit(EXIT_FAILURE, "Could not create external memory\n");
923
924                         heap_socket =
925                                 rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
926                         if (heap_socket < 0)
927                                 rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
928
929                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
930                                         rte_mbuf_best_mempool_ops());
931                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
932                                         mb_mempool_cache, 0, mbuf_seg_size,
933                                         heap_socket);
934                         break;
935                 }
936         default:
937                 {
938                         rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
939                 }
940         }
941
942 err:
943         if (rte_mp == NULL) {
944                 rte_exit(EXIT_FAILURE,
945                         "Creation of mbuf pool for socket %u failed: %s\n",
946                         socket_id, rte_strerror(rte_errno));
947         } else if (verbose_level > 0) {
948                 rte_mempool_dump(stdout, rte_mp);
949         }
950         return rte_mp;
951 }
952
953 /*
954  * Check given socket id is valid or not with NUMA mode,
955  * if valid, return 0, else return -1
956  */
957 static int
958 check_socket_id(const unsigned int socket_id)
959 {
960         static int warning_once = 0;
961
962         if (new_socket_id(socket_id)) {
963                 if (!warning_once && numa_support)
964                         printf("Warning: NUMA should be configured manually by"
965                                " using --port-numa-config and"
966                                " --ring-numa-config parameters along with"
967                                " --numa.\n");
968                 warning_once = 1;
969                 return -1;
970         }
971         return 0;
972 }
973
974 /*
975  * Get the allowed maximum number of RX queues.
976  * *pid return the port id which has minimal value of
977  * max_rx_queues in all ports.
978  */
979 queueid_t
980 get_allowed_max_nb_rxq(portid_t *pid)
981 {
982         queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
983         bool max_rxq_valid = false;
984         portid_t pi;
985         struct rte_eth_dev_info dev_info;
986
987         RTE_ETH_FOREACH_DEV(pi) {
988                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
989                         continue;
990
991                 max_rxq_valid = true;
992                 if (dev_info.max_rx_queues < allowed_max_rxq) {
993                         allowed_max_rxq = dev_info.max_rx_queues;
994                         *pid = pi;
995                 }
996         }
997         return max_rxq_valid ? allowed_max_rxq : 0;
998 }
999
1000 /*
1001  * Check input rxq is valid or not.
1002  * If input rxq is not greater than any of maximum number
1003  * of RX queues of all ports, it is valid.
1004  * if valid, return 0, else return -1
1005  */
1006 int
1007 check_nb_rxq(queueid_t rxq)
1008 {
1009         queueid_t allowed_max_rxq;
1010         portid_t pid = 0;
1011
1012         allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1013         if (rxq > allowed_max_rxq) {
1014                 printf("Fail: input rxq (%u) can't be greater "
1015                        "than max_rx_queues (%u) of port %u\n",
1016                        rxq,
1017                        allowed_max_rxq,
1018                        pid);
1019                 return -1;
1020         }
1021         return 0;
1022 }
1023
1024 /*
1025  * Get the allowed maximum number of TX queues.
1026  * *pid return the port id which has minimal value of
1027  * max_tx_queues in all ports.
1028  */
1029 queueid_t
1030 get_allowed_max_nb_txq(portid_t *pid)
1031 {
1032         queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1033         bool max_txq_valid = false;
1034         portid_t pi;
1035         struct rte_eth_dev_info dev_info;
1036
1037         RTE_ETH_FOREACH_DEV(pi) {
1038                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1039                         continue;
1040
1041                 max_txq_valid = true;
1042                 if (dev_info.max_tx_queues < allowed_max_txq) {
1043                         allowed_max_txq = dev_info.max_tx_queues;
1044                         *pid = pi;
1045                 }
1046         }
1047         return max_txq_valid ? allowed_max_txq : 0;
1048 }
1049
1050 /*
1051  * Check input txq is valid or not.
1052  * If input txq is not greater than any of maximum number
1053  * of TX queues of all ports, it is valid.
1054  * if valid, return 0, else return -1
1055  */
1056 int
1057 check_nb_txq(queueid_t txq)
1058 {
1059         queueid_t allowed_max_txq;
1060         portid_t pid = 0;
1061
1062         allowed_max_txq = get_allowed_max_nb_txq(&pid);
1063         if (txq > allowed_max_txq) {
1064                 printf("Fail: input txq (%u) can't be greater "
1065                        "than max_tx_queues (%u) of port %u\n",
1066                        txq,
1067                        allowed_max_txq,
1068                        pid);
1069                 return -1;
1070         }
1071         return 0;
1072 }
1073
1074 /*
1075  * Get the allowed maximum number of hairpin queues.
1076  * *pid return the port id which has minimal value of
1077  * max_hairpin_queues in all ports.
1078  */
1079 queueid_t
1080 get_allowed_max_nb_hairpinq(portid_t *pid)
1081 {
1082         queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1083         portid_t pi;
1084         struct rte_eth_hairpin_cap cap;
1085
1086         RTE_ETH_FOREACH_DEV(pi) {
1087                 if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1088                         *pid = pi;
1089                         return 0;
1090                 }
1091                 if (cap.max_nb_queues < allowed_max_hairpinq) {
1092                         allowed_max_hairpinq = cap.max_nb_queues;
1093                         *pid = pi;
1094                 }
1095         }
1096         return allowed_max_hairpinq;
1097 }
1098
1099 /*
1100  * Check input hairpin is valid or not.
1101  * If input hairpin is not greater than any of maximum number
1102  * of hairpin queues of all ports, it is valid.
1103  * if valid, return 0, else return -1
1104  */
1105 int
1106 check_nb_hairpinq(queueid_t hairpinq)
1107 {
1108         queueid_t allowed_max_hairpinq;
1109         portid_t pid = 0;
1110
1111         allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1112         if (hairpinq > allowed_max_hairpinq) {
1113                 printf("Fail: input hairpin (%u) can't be greater "
1114                        "than max_hairpin_queues (%u) of port %u\n",
1115                        hairpinq, allowed_max_hairpinq, pid);
1116                 return -1;
1117         }
1118         return 0;
1119 }
1120
1121 static void
1122 init_config(void)
1123 {
1124         portid_t pid;
1125         struct rte_port *port;
1126         struct rte_mempool *mbp;
1127         unsigned int nb_mbuf_per_pool;
1128         lcoreid_t  lc_id;
1129         uint8_t port_per_socket[RTE_MAX_NUMA_NODES];
1130         struct rte_gro_param gro_param;
1131         uint32_t gso_types;
1132         uint16_t data_size;
1133         bool warning = 0;
1134         int k;
1135         int ret;
1136
1137         memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
1138
1139         /* Configuration of logical cores. */
1140         fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1141                                 sizeof(struct fwd_lcore *) * nb_lcores,
1142                                 RTE_CACHE_LINE_SIZE);
1143         if (fwd_lcores == NULL) {
1144                 rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1145                                                         "failed\n", nb_lcores);
1146         }
1147         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1148                 fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1149                                                sizeof(struct fwd_lcore),
1150                                                RTE_CACHE_LINE_SIZE);
1151                 if (fwd_lcores[lc_id] == NULL) {
1152                         rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1153                                                                 "failed\n");
1154                 }
1155                 fwd_lcores[lc_id]->cpuid_idx = lc_id;
1156         }
1157
1158         RTE_ETH_FOREACH_DEV(pid) {
1159                 port = &ports[pid];
1160                 /* Apply default TxRx configuration for all ports */
1161                 port->dev_conf.txmode = tx_mode;
1162                 port->dev_conf.rxmode = rx_mode;
1163
1164                 ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1165                 if (ret != 0)
1166                         rte_exit(EXIT_FAILURE,
1167                                  "rte_eth_dev_info_get() failed\n");
1168
1169                 if (!(port->dev_info.tx_offload_capa &
1170                       DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1171                         port->dev_conf.txmode.offloads &=
1172                                 ~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1173                 if (numa_support) {
1174                         if (port_numa[pid] != NUMA_NO_CONFIG)
1175                                 port_per_socket[port_numa[pid]]++;
1176                         else {
1177                                 uint32_t socket_id = rte_eth_dev_socket_id(pid);
1178
1179                                 /*
1180                                  * if socket_id is invalid,
1181                                  * set to the first available socket.
1182                                  */
1183                                 if (check_socket_id(socket_id) < 0)
1184                                         socket_id = socket_ids[0];
1185                                 port_per_socket[socket_id]++;
1186                         }
1187                 }
1188
1189                 /* Apply Rx offloads configuration */
1190                 for (k = 0; k < port->dev_info.max_rx_queues; k++)
1191                         port->rx_conf[k].offloads =
1192                                 port->dev_conf.rxmode.offloads;
1193                 /* Apply Tx offloads configuration */
1194                 for (k = 0; k < port->dev_info.max_tx_queues; k++)
1195                         port->tx_conf[k].offloads =
1196                                 port->dev_conf.txmode.offloads;
1197
1198                 /* set flag to initialize port/queue */
1199                 port->need_reconfig = 1;
1200                 port->need_reconfig_queues = 1;
1201                 port->tx_metadata = 0;
1202
1203                 /* Check for maximum number of segments per MTU. Accordingly
1204                  * update the mbuf data size.
1205                  */
1206                 if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1207                                 port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1208                         data_size = rx_mode.max_rx_pkt_len /
1209                                 port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1210
1211                         if ((data_size + RTE_PKTMBUF_HEADROOM) >
1212                                                         mbuf_data_size) {
1213                                 mbuf_data_size = data_size +
1214                                                  RTE_PKTMBUF_HEADROOM;
1215                                 warning = 1;
1216                         }
1217                 }
1218         }
1219
1220         if (warning)
1221                 TESTPMD_LOG(WARNING, "Configured mbuf size %hu\n",
1222                             mbuf_data_size);
1223
1224         /*
1225          * Create pools of mbuf.
1226          * If NUMA support is disabled, create a single pool of mbuf in
1227          * socket 0 memory by default.
1228          * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1229          *
1230          * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1231          * nb_txd can be configured at run time.
1232          */
1233         if (param_total_num_mbufs)
1234                 nb_mbuf_per_pool = param_total_num_mbufs;
1235         else {
1236                 nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1237                         (nb_lcores * mb_mempool_cache) +
1238                         RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1239                 nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1240         }
1241
1242         if (numa_support) {
1243                 uint8_t i;
1244
1245                 for (i = 0; i < num_sockets; i++)
1246                         mempools[i] = mbuf_pool_create(mbuf_data_size,
1247                                                        nb_mbuf_per_pool,
1248                                                        socket_ids[i]);
1249         } else {
1250                 if (socket_num == UMA_NO_CONFIG)
1251                         mempools[0] = mbuf_pool_create(mbuf_data_size,
1252                                                        nb_mbuf_per_pool, 0);
1253                 else
1254                         mempools[socket_num] = mbuf_pool_create
1255                                                         (mbuf_data_size,
1256                                                          nb_mbuf_per_pool,
1257                                                          socket_num);
1258         }
1259
1260         init_port_config();
1261
1262         gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1263                 DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1264         /*
1265          * Records which Mbuf pool to use by each logical core, if needed.
1266          */
1267         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1268                 mbp = mbuf_pool_find(
1269                         rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]));
1270
1271                 if (mbp == NULL)
1272                         mbp = mbuf_pool_find(0);
1273                 fwd_lcores[lc_id]->mbp = mbp;
1274                 /* initialize GSO context */
1275                 fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1276                 fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1277                 fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1278                 fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1279                         RTE_ETHER_CRC_LEN;
1280                 fwd_lcores[lc_id]->gso_ctx.flag = 0;
1281         }
1282
1283         /* Configuration of packet forwarding streams. */
1284         if (init_fwd_streams() < 0)
1285                 rte_exit(EXIT_FAILURE, "FAIL from init_fwd_streams()\n");
1286
1287         fwd_config_setup();
1288
1289         /* create a gro context for each lcore */
1290         gro_param.gro_types = RTE_GRO_TCP_IPV4;
1291         gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1292         gro_param.max_item_per_flow = MAX_PKT_BURST;
1293         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1294                 gro_param.socket_id = rte_lcore_to_socket_id(
1295                                 fwd_lcores_cpuids[lc_id]);
1296                 fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1297                 if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1298                         rte_exit(EXIT_FAILURE,
1299                                         "rte_gro_ctx_create() failed\n");
1300                 }
1301         }
1302
1303 #if defined RTE_LIBRTE_PMD_SOFTNIC
1304         if (strcmp(cur_fwd_eng->fwd_mode_name, "softnic") == 0) {
1305                 RTE_ETH_FOREACH_DEV(pid) {
1306                         port = &ports[pid];
1307                         const char *driver = port->dev_info.driver_name;
1308
1309                         if (strcmp(driver, "net_softnic") == 0)
1310                                 port->softport.fwd_lcore_arg = fwd_lcores;
1311                 }
1312         }
1313 #endif
1314
1315 }
1316
1317
1318 void
1319 reconfig(portid_t new_port_id, unsigned socket_id)
1320 {
1321         struct rte_port *port;
1322         int ret;
1323
1324         /* Reconfiguration of Ethernet ports. */
1325         port = &ports[new_port_id];
1326
1327         ret = eth_dev_info_get_print_err(new_port_id, &port->dev_info);
1328         if (ret != 0)
1329                 return;
1330
1331         /* set flag to initialize port/queue */
1332         port->need_reconfig = 1;
1333         port->need_reconfig_queues = 1;
1334         port->socket_id = socket_id;
1335
1336         init_port_config();
1337 }
1338
1339
1340 int
1341 init_fwd_streams(void)
1342 {
1343         portid_t pid;
1344         struct rte_port *port;
1345         streamid_t sm_id, nb_fwd_streams_new;
1346         queueid_t q;
1347
1348         /* set socket id according to numa or not */
1349         RTE_ETH_FOREACH_DEV(pid) {
1350                 port = &ports[pid];
1351                 if (nb_rxq > port->dev_info.max_rx_queues) {
1352                         printf("Fail: nb_rxq(%d) is greater than "
1353                                 "max_rx_queues(%d)\n", nb_rxq,
1354                                 port->dev_info.max_rx_queues);
1355                         return -1;
1356                 }
1357                 if (nb_txq > port->dev_info.max_tx_queues) {
1358                         printf("Fail: nb_txq(%d) is greater than "
1359                                 "max_tx_queues(%d)\n", nb_txq,
1360                                 port->dev_info.max_tx_queues);
1361                         return -1;
1362                 }
1363                 if (numa_support) {
1364                         if (port_numa[pid] != NUMA_NO_CONFIG)
1365                                 port->socket_id = port_numa[pid];
1366                         else {
1367                                 port->socket_id = rte_eth_dev_socket_id(pid);
1368
1369                                 /*
1370                                  * if socket_id is invalid,
1371                                  * set to the first available socket.
1372                                  */
1373                                 if (check_socket_id(port->socket_id) < 0)
1374                                         port->socket_id = socket_ids[0];
1375                         }
1376                 }
1377                 else {
1378                         if (socket_num == UMA_NO_CONFIG)
1379                                 port->socket_id = 0;
1380                         else
1381                                 port->socket_id = socket_num;
1382                 }
1383         }
1384
1385         q = RTE_MAX(nb_rxq, nb_txq);
1386         if (q == 0) {
1387                 printf("Fail: Cannot allocate fwd streams as number of queues is 0\n");
1388                 return -1;
1389         }
1390         nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1391         if (nb_fwd_streams_new == nb_fwd_streams)
1392                 return 0;
1393         /* clear the old */
1394         if (fwd_streams != NULL) {
1395                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1396                         if (fwd_streams[sm_id] == NULL)
1397                                 continue;
1398                         rte_free(fwd_streams[sm_id]);
1399                         fwd_streams[sm_id] = NULL;
1400                 }
1401                 rte_free(fwd_streams);
1402                 fwd_streams = NULL;
1403         }
1404
1405         /* init new */
1406         nb_fwd_streams = nb_fwd_streams_new;
1407         if (nb_fwd_streams) {
1408                 fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1409                         sizeof(struct fwd_stream *) * nb_fwd_streams,
1410                         RTE_CACHE_LINE_SIZE);
1411                 if (fwd_streams == NULL)
1412                         rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1413                                  " (struct fwd_stream *)) failed\n",
1414                                  nb_fwd_streams);
1415
1416                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1417                         fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1418                                 " struct fwd_stream", sizeof(struct fwd_stream),
1419                                 RTE_CACHE_LINE_SIZE);
1420                         if (fwd_streams[sm_id] == NULL)
1421                                 rte_exit(EXIT_FAILURE, "rte_zmalloc"
1422                                          "(struct fwd_stream) failed\n");
1423                 }
1424         }
1425
1426         return 0;
1427 }
1428
1429 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1430 static void
1431 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1432 {
1433         unsigned int total_burst;
1434         unsigned int nb_burst;
1435         unsigned int burst_stats[3];
1436         uint16_t pktnb_stats[3];
1437         uint16_t nb_pkt;
1438         int burst_percent[3];
1439
1440         /*
1441          * First compute the total number of packet bursts and the
1442          * two highest numbers of bursts of the same number of packets.
1443          */
1444         total_burst = 0;
1445         burst_stats[0] = burst_stats[1] = burst_stats[2] = 0;
1446         pktnb_stats[0] = pktnb_stats[1] = pktnb_stats[2] = 0;
1447         for (nb_pkt = 0; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1448                 nb_burst = pbs->pkt_burst_spread[nb_pkt];
1449                 if (nb_burst == 0)
1450                         continue;
1451                 total_burst += nb_burst;
1452                 if (nb_burst > burst_stats[0]) {
1453                         burst_stats[1] = burst_stats[0];
1454                         pktnb_stats[1] = pktnb_stats[0];
1455                         burst_stats[0] = nb_burst;
1456                         pktnb_stats[0] = nb_pkt;
1457                 } else if (nb_burst > burst_stats[1]) {
1458                         burst_stats[1] = nb_burst;
1459                         pktnb_stats[1] = nb_pkt;
1460                 }
1461         }
1462         if (total_burst == 0)
1463                 return;
1464         burst_percent[0] = (burst_stats[0] * 100) / total_burst;
1465         printf("  %s-bursts : %u [%d%% of %d pkts", rx_tx, total_burst,
1466                burst_percent[0], (int) pktnb_stats[0]);
1467         if (burst_stats[0] == total_burst) {
1468                 printf("]\n");
1469                 return;
1470         }
1471         if (burst_stats[0] + burst_stats[1] == total_burst) {
1472                 printf(" + %d%% of %d pkts]\n",
1473                        100 - burst_percent[0], pktnb_stats[1]);
1474                 return;
1475         }
1476         burst_percent[1] = (burst_stats[1] * 100) / total_burst;
1477         burst_percent[2] = 100 - (burst_percent[0] + burst_percent[1]);
1478         if ((burst_percent[1] == 0) || (burst_percent[2] == 0)) {
1479                 printf(" + %d%% of others]\n", 100 - burst_percent[0]);
1480                 return;
1481         }
1482         printf(" + %d%% of %d pkts + %d%% of others]\n",
1483                burst_percent[1], (int) pktnb_stats[1], burst_percent[2]);
1484 }
1485 #endif /* RTE_TEST_PMD_RECORD_BURST_STATS */
1486
1487 static void
1488 fwd_stream_stats_display(streamid_t stream_id)
1489 {
1490         struct fwd_stream *fs;
1491         static const char *fwd_top_stats_border = "-------";
1492
1493         fs = fwd_streams[stream_id];
1494         if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1495             (fs->fwd_dropped == 0))
1496                 return;
1497         printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1498                "TX Port=%2d/Queue=%2d %s\n",
1499                fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1500                fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1501         printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1502                " TX-dropped: %-14"PRIu64,
1503                fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1504
1505         /* if checksum mode */
1506         if (cur_fwd_eng == &csum_fwd_engine) {
1507                 printf("  RX- bad IP checksum: %-14"PRIu64
1508                        "  Rx- bad L4 checksum: %-14"PRIu64
1509                        " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1510                         fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1511                         fs->rx_bad_outer_l4_csum);
1512         } else {
1513                 printf("\n");
1514         }
1515
1516 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1517         pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1518         pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1519 #endif
1520 }
1521
1522 void
1523 fwd_stats_display(void)
1524 {
1525         static const char *fwd_stats_border = "----------------------";
1526         static const char *acc_stats_border = "+++++++++++++++";
1527         struct {
1528                 struct fwd_stream *rx_stream;
1529                 struct fwd_stream *tx_stream;
1530                 uint64_t tx_dropped;
1531                 uint64_t rx_bad_ip_csum;
1532                 uint64_t rx_bad_l4_csum;
1533                 uint64_t rx_bad_outer_l4_csum;
1534         } ports_stats[RTE_MAX_ETHPORTS];
1535         uint64_t total_rx_dropped = 0;
1536         uint64_t total_tx_dropped = 0;
1537         uint64_t total_rx_nombuf = 0;
1538         struct rte_eth_stats stats;
1539 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1540         uint64_t fwd_cycles = 0;
1541 #endif
1542         uint64_t total_recv = 0;
1543         uint64_t total_xmit = 0;
1544         struct rte_port *port;
1545         streamid_t sm_id;
1546         portid_t pt_id;
1547         int i;
1548
1549         memset(ports_stats, 0, sizeof(ports_stats));
1550
1551         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1552                 struct fwd_stream *fs = fwd_streams[sm_id];
1553
1554                 if (cur_fwd_config.nb_fwd_streams >
1555                     cur_fwd_config.nb_fwd_ports) {
1556                         fwd_stream_stats_display(sm_id);
1557                 } else {
1558                         ports_stats[fs->tx_port].tx_stream = fs;
1559                         ports_stats[fs->rx_port].rx_stream = fs;
1560                 }
1561
1562                 ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
1563
1564                 ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
1565                 ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
1566                 ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
1567                                 fs->rx_bad_outer_l4_csum;
1568
1569 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1570                 fwd_cycles += fs->core_cycles;
1571 #endif
1572         }
1573         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1574                 uint8_t j;
1575
1576                 pt_id = fwd_ports_ids[i];
1577                 port = &ports[pt_id];
1578
1579                 rte_eth_stats_get(pt_id, &stats);
1580                 stats.ipackets -= port->stats.ipackets;
1581                 stats.opackets -= port->stats.opackets;
1582                 stats.ibytes -= port->stats.ibytes;
1583                 stats.obytes -= port->stats.obytes;
1584                 stats.imissed -= port->stats.imissed;
1585                 stats.oerrors -= port->stats.oerrors;
1586                 stats.rx_nombuf -= port->stats.rx_nombuf;
1587
1588                 total_recv += stats.ipackets;
1589                 total_xmit += stats.opackets;
1590                 total_rx_dropped += stats.imissed;
1591                 total_tx_dropped += ports_stats[pt_id].tx_dropped;
1592                 total_tx_dropped += stats.oerrors;
1593                 total_rx_nombuf  += stats.rx_nombuf;
1594
1595                 printf("\n  %s Forward statistics for port %-2d %s\n",
1596                        fwd_stats_border, pt_id, fwd_stats_border);
1597
1598                 if (!port->rx_queue_stats_mapping_enabled &&
1599                     !port->tx_queue_stats_mapping_enabled) {
1600                         printf("  RX-packets: %-14"PRIu64
1601                                " RX-dropped: %-14"PRIu64
1602                                "RX-total: %-"PRIu64"\n",
1603                                stats.ipackets, stats.imissed,
1604                                stats.ipackets + stats.imissed);
1605
1606                         if (cur_fwd_eng == &csum_fwd_engine)
1607                                 printf("  Bad-ipcsum: %-14"PRIu64
1608                                        " Bad-l4csum: %-14"PRIu64
1609                                        "Bad-outer-l4csum: %-14"PRIu64"\n",
1610                                        ports_stats[pt_id].rx_bad_ip_csum,
1611                                        ports_stats[pt_id].rx_bad_l4_csum,
1612                                        ports_stats[pt_id].rx_bad_outer_l4_csum);
1613                         if (stats.ierrors + stats.rx_nombuf > 0) {
1614                                 printf("  RX-error: %-"PRIu64"\n",
1615                                        stats.ierrors);
1616                                 printf("  RX-nombufs: %-14"PRIu64"\n",
1617                                        stats.rx_nombuf);
1618                         }
1619
1620                         printf("  TX-packets: %-14"PRIu64
1621                                " TX-dropped: %-14"PRIu64
1622                                "TX-total: %-"PRIu64"\n",
1623                                stats.opackets, ports_stats[pt_id].tx_dropped,
1624                                stats.opackets + ports_stats[pt_id].tx_dropped);
1625                 } else {
1626                         printf("  RX-packets:             %14"PRIu64
1627                                "    RX-dropped:%14"PRIu64
1628                                "    RX-total:%14"PRIu64"\n",
1629                                stats.ipackets, stats.imissed,
1630                                stats.ipackets + stats.imissed);
1631
1632                         if (cur_fwd_eng == &csum_fwd_engine)
1633                                 printf("  Bad-ipcsum:%14"PRIu64
1634                                        "    Bad-l4csum:%14"PRIu64
1635                                        "    Bad-outer-l4csum: %-14"PRIu64"\n",
1636                                        ports_stats[pt_id].rx_bad_ip_csum,
1637                                        ports_stats[pt_id].rx_bad_l4_csum,
1638                                        ports_stats[pt_id].rx_bad_outer_l4_csum);
1639                         if ((stats.ierrors + stats.rx_nombuf) > 0) {
1640                                 printf("  RX-error:%"PRIu64"\n", stats.ierrors);
1641                                 printf("  RX-nombufs:             %14"PRIu64"\n",
1642                                        stats.rx_nombuf);
1643                         }
1644
1645                         printf("  TX-packets:             %14"PRIu64
1646                                "    TX-dropped:%14"PRIu64
1647                                "    TX-total:%14"PRIu64"\n",
1648                                stats.opackets, ports_stats[pt_id].tx_dropped,
1649                                stats.opackets + ports_stats[pt_id].tx_dropped);
1650                 }
1651
1652 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1653                 if (ports_stats[pt_id].rx_stream)
1654                         pkt_burst_stats_display("RX",
1655                                 &ports_stats[pt_id].rx_stream->rx_burst_stats);
1656                 if (ports_stats[pt_id].tx_stream)
1657                         pkt_burst_stats_display("TX",
1658                                 &ports_stats[pt_id].tx_stream->tx_burst_stats);
1659 #endif
1660
1661                 if (port->rx_queue_stats_mapping_enabled) {
1662                         printf("\n");
1663                         for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1664                                 printf("  Stats reg %2d RX-packets:%14"PRIu64
1665                                        "     RX-errors:%14"PRIu64
1666                                        "    RX-bytes:%14"PRIu64"\n",
1667                                        j, stats.q_ipackets[j],
1668                                        stats.q_errors[j], stats.q_ibytes[j]);
1669                         }
1670                         printf("\n");
1671                 }
1672                 if (port->tx_queue_stats_mapping_enabled) {
1673                         for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1674                                 printf("  Stats reg %2d TX-packets:%14"PRIu64
1675                                        "                                 TX-bytes:%14"
1676                                        PRIu64"\n",
1677                                        j, stats.q_opackets[j],
1678                                        stats.q_obytes[j]);
1679                         }
1680                 }
1681
1682                 printf("  %s--------------------------------%s\n",
1683                        fwd_stats_border, fwd_stats_border);
1684         }
1685
1686         printf("\n  %s Accumulated forward statistics for all ports"
1687                "%s\n",
1688                acc_stats_border, acc_stats_border);
1689         printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
1690                "%-"PRIu64"\n"
1691                "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
1692                "%-"PRIu64"\n",
1693                total_recv, total_rx_dropped, total_recv + total_rx_dropped,
1694                total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
1695         if (total_rx_nombuf > 0)
1696                 printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
1697         printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
1698                "%s\n",
1699                acc_stats_border, acc_stats_border);
1700 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1701         if (total_recv > 0)
1702                 printf("\n  CPU cycles/packet=%u (total cycles="
1703                        "%"PRIu64" / total RX packets=%"PRIu64")\n",
1704                        (unsigned int)(fwd_cycles / total_recv),
1705                        fwd_cycles, total_recv);
1706 #endif
1707 }
1708
1709 void
1710 fwd_stats_reset(void)
1711 {
1712         streamid_t sm_id;
1713         portid_t pt_id;
1714         int i;
1715
1716         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1717                 pt_id = fwd_ports_ids[i];
1718                 rte_eth_stats_get(pt_id, &ports[pt_id].stats);
1719         }
1720         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1721                 struct fwd_stream *fs = fwd_streams[sm_id];
1722
1723                 fs->rx_packets = 0;
1724                 fs->tx_packets = 0;
1725                 fs->fwd_dropped = 0;
1726                 fs->rx_bad_ip_csum = 0;
1727                 fs->rx_bad_l4_csum = 0;
1728                 fs->rx_bad_outer_l4_csum = 0;
1729
1730 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1731                 memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
1732                 memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
1733 #endif
1734 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1735                 fs->core_cycles = 0;
1736 #endif
1737         }
1738 }
1739
1740 static void
1741 flush_fwd_rx_queues(void)
1742 {
1743         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1744         portid_t  rxp;
1745         portid_t port_id;
1746         queueid_t rxq;
1747         uint16_t  nb_rx;
1748         uint16_t  i;
1749         uint8_t   j;
1750         uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
1751         uint64_t timer_period;
1752
1753         /* convert to number of cycles */
1754         timer_period = rte_get_timer_hz(); /* 1 second timeout */
1755
1756         for (j = 0; j < 2; j++) {
1757                 for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
1758                         for (rxq = 0; rxq < nb_rxq; rxq++) {
1759                                 port_id = fwd_ports_ids[rxp];
1760                                 /**
1761                                 * testpmd can stuck in the below do while loop
1762                                 * if rte_eth_rx_burst() always returns nonzero
1763                                 * packets. So timer is added to exit this loop
1764                                 * after 1sec timer expiry.
1765                                 */
1766                                 prev_tsc = rte_rdtsc();
1767                                 do {
1768                                         nb_rx = rte_eth_rx_burst(port_id, rxq,
1769                                                 pkts_burst, MAX_PKT_BURST);
1770                                         for (i = 0; i < nb_rx; i++)
1771                                                 rte_pktmbuf_free(pkts_burst[i]);
1772
1773                                         cur_tsc = rte_rdtsc();
1774                                         diff_tsc = cur_tsc - prev_tsc;
1775                                         timer_tsc += diff_tsc;
1776                                 } while ((nb_rx > 0) &&
1777                                         (timer_tsc < timer_period));
1778                                 timer_tsc = 0;
1779                         }
1780                 }
1781                 rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
1782         }
1783 }
1784
1785 static void
1786 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
1787 {
1788         struct fwd_stream **fsm;
1789         streamid_t nb_fs;
1790         streamid_t sm_id;
1791 #ifdef RTE_LIBRTE_BITRATE
1792         uint64_t tics_per_1sec;
1793         uint64_t tics_datum;
1794         uint64_t tics_current;
1795         uint16_t i, cnt_ports;
1796
1797         cnt_ports = nb_ports;
1798         tics_datum = rte_rdtsc();
1799         tics_per_1sec = rte_get_timer_hz();
1800 #endif
1801         fsm = &fwd_streams[fc->stream_idx];
1802         nb_fs = fc->stream_nb;
1803         do {
1804                 for (sm_id = 0; sm_id < nb_fs; sm_id++)
1805                         (*pkt_fwd)(fsm[sm_id]);
1806 #ifdef RTE_LIBRTE_BITRATE
1807                 if (bitrate_enabled != 0 &&
1808                                 bitrate_lcore_id == rte_lcore_id()) {
1809                         tics_current = rte_rdtsc();
1810                         if (tics_current - tics_datum >= tics_per_1sec) {
1811                                 /* Periodic bitrate calculation */
1812                                 for (i = 0; i < cnt_ports; i++)
1813                                         rte_stats_bitrate_calc(bitrate_data,
1814                                                 ports_ids[i]);
1815                                 tics_datum = tics_current;
1816                         }
1817                 }
1818 #endif
1819 #ifdef RTE_LIBRTE_LATENCY_STATS
1820                 if (latencystats_enabled != 0 &&
1821                                 latencystats_lcore_id == rte_lcore_id())
1822                         rte_latencystats_update();
1823 #endif
1824
1825         } while (! fc->stopped);
1826 }
1827
1828 static int
1829 start_pkt_forward_on_core(void *fwd_arg)
1830 {
1831         run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
1832                              cur_fwd_config.fwd_eng->packet_fwd);
1833         return 0;
1834 }
1835
1836 /*
1837  * Run the TXONLY packet forwarding engine to send a single burst of packets.
1838  * Used to start communication flows in network loopback test configurations.
1839  */
1840 static int
1841 run_one_txonly_burst_on_core(void *fwd_arg)
1842 {
1843         struct fwd_lcore *fwd_lc;
1844         struct fwd_lcore tmp_lcore;
1845
1846         fwd_lc = (struct fwd_lcore *) fwd_arg;
1847         tmp_lcore = *fwd_lc;
1848         tmp_lcore.stopped = 1;
1849         run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
1850         return 0;
1851 }
1852
1853 /*
1854  * Launch packet forwarding:
1855  *     - Setup per-port forwarding context.
1856  *     - launch logical cores with their forwarding configuration.
1857  */
1858 static void
1859 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
1860 {
1861         port_fwd_begin_t port_fwd_begin;
1862         unsigned int i;
1863         unsigned int lc_id;
1864         int diag;
1865
1866         port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
1867         if (port_fwd_begin != NULL) {
1868                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
1869                         (*port_fwd_begin)(fwd_ports_ids[i]);
1870         }
1871         for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
1872                 lc_id = fwd_lcores_cpuids[i];
1873                 if ((interactive == 0) || (lc_id != rte_lcore_id())) {
1874                         fwd_lcores[i]->stopped = 0;
1875                         diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
1876                                                      fwd_lcores[i], lc_id);
1877                         if (diag != 0)
1878                                 printf("launch lcore %u failed - diag=%d\n",
1879                                        lc_id, diag);
1880                 }
1881         }
1882 }
1883
1884 /*
1885  * Launch packet forwarding configuration.
1886  */
1887 void
1888 start_packet_forwarding(int with_tx_first)
1889 {
1890         port_fwd_begin_t port_fwd_begin;
1891         port_fwd_end_t  port_fwd_end;
1892         struct rte_port *port;
1893         unsigned int i;
1894         portid_t   pt_id;
1895
1896         if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
1897                 rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
1898
1899         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
1900                 rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
1901
1902         if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
1903                 strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
1904                 (!nb_rxq || !nb_txq))
1905                 rte_exit(EXIT_FAILURE,
1906                         "Either rxq or txq are 0, cannot use %s fwd mode\n",
1907                         cur_fwd_eng->fwd_mode_name);
1908
1909         if (all_ports_started() == 0) {
1910                 printf("Not all ports were started\n");
1911                 return;
1912         }
1913         if (test_done == 0) {
1914                 printf("Packet forwarding already started\n");
1915                 return;
1916         }
1917
1918
1919         if(dcb_test) {
1920                 for (i = 0; i < nb_fwd_ports; i++) {
1921                         pt_id = fwd_ports_ids[i];
1922                         port = &ports[pt_id];
1923                         if (!port->dcb_flag) {
1924                                 printf("In DCB mode, all forwarding ports must "
1925                                        "be configured in this mode.\n");
1926                                 return;
1927                         }
1928                 }
1929                 if (nb_fwd_lcores == 1) {
1930                         printf("In DCB mode,the nb forwarding cores "
1931                                "should be larger than 1.\n");
1932                         return;
1933                 }
1934         }
1935         test_done = 0;
1936
1937         fwd_config_setup();
1938
1939         if(!no_flush_rx)
1940                 flush_fwd_rx_queues();
1941
1942         pkt_fwd_config_display(&cur_fwd_config);
1943         rxtx_config_display();
1944
1945         fwd_stats_reset();
1946         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1947                 pt_id = fwd_ports_ids[i];
1948                 port = &ports[pt_id];
1949                 map_port_queue_stats_mapping_registers(pt_id, port);
1950         }
1951         if (with_tx_first) {
1952                 port_fwd_begin = tx_only_engine.port_fwd_begin;
1953                 if (port_fwd_begin != NULL) {
1954                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
1955                                 (*port_fwd_begin)(fwd_ports_ids[i]);
1956                 }
1957                 while (with_tx_first--) {
1958                         launch_packet_forwarding(
1959                                         run_one_txonly_burst_on_core);
1960                         rte_eal_mp_wait_lcore();
1961                 }
1962                 port_fwd_end = tx_only_engine.port_fwd_end;
1963                 if (port_fwd_end != NULL) {
1964                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
1965                                 (*port_fwd_end)(fwd_ports_ids[i]);
1966                 }
1967         }
1968         launch_packet_forwarding(start_pkt_forward_on_core);
1969 }
1970
1971 void
1972 stop_packet_forwarding(void)
1973 {
1974         port_fwd_end_t port_fwd_end;
1975         lcoreid_t lc_id;
1976         portid_t pt_id;
1977         int i;
1978
1979         if (test_done) {
1980                 printf("Packet forwarding not started\n");
1981                 return;
1982         }
1983         printf("Telling cores to stop...");
1984         for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
1985                 fwd_lcores[lc_id]->stopped = 1;
1986         printf("\nWaiting for lcores to finish...\n");
1987         rte_eal_mp_wait_lcore();
1988         port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
1989         if (port_fwd_end != NULL) {
1990                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1991                         pt_id = fwd_ports_ids[i];
1992                         (*port_fwd_end)(pt_id);
1993                 }
1994         }
1995
1996         fwd_stats_display();
1997
1998         printf("\nDone.\n");
1999         test_done = 1;
2000 }
2001
2002 void
2003 dev_set_link_up(portid_t pid)
2004 {
2005         if (rte_eth_dev_set_link_up(pid) < 0)
2006                 printf("\nSet link up fail.\n");
2007 }
2008
2009 void
2010 dev_set_link_down(portid_t pid)
2011 {
2012         if (rte_eth_dev_set_link_down(pid) < 0)
2013                 printf("\nSet link down fail.\n");
2014 }
2015
2016 static int
2017 all_ports_started(void)
2018 {
2019         portid_t pi;
2020         struct rte_port *port;
2021
2022         RTE_ETH_FOREACH_DEV(pi) {
2023                 port = &ports[pi];
2024                 /* Check if there is a port which is not started */
2025                 if ((port->port_status != RTE_PORT_STARTED) &&
2026                         (port->slave_flag == 0))
2027                         return 0;
2028         }
2029
2030         /* No port is not started */
2031         return 1;
2032 }
2033
2034 int
2035 port_is_stopped(portid_t port_id)
2036 {
2037         struct rte_port *port = &ports[port_id];
2038
2039         if ((port->port_status != RTE_PORT_STOPPED) &&
2040             (port->slave_flag == 0))
2041                 return 0;
2042         return 1;
2043 }
2044
2045 int
2046 all_ports_stopped(void)
2047 {
2048         portid_t pi;
2049
2050         RTE_ETH_FOREACH_DEV(pi) {
2051                 if (!port_is_stopped(pi))
2052                         return 0;
2053         }
2054
2055         return 1;
2056 }
2057
2058 int
2059 port_is_started(portid_t port_id)
2060 {
2061         if (port_id_is_invalid(port_id, ENABLED_WARN))
2062                 return 0;
2063
2064         if (ports[port_id].port_status != RTE_PORT_STARTED)
2065                 return 0;
2066
2067         return 1;
2068 }
2069
2070 /* Configure the Rx and Tx hairpin queues for the selected port. */
2071 static int
2072 setup_hairpin_queues(portid_t pi)
2073 {
2074         queueid_t qi;
2075         struct rte_eth_hairpin_conf hairpin_conf = {
2076                 .peer_count = 1,
2077         };
2078         int i;
2079         int diag;
2080         struct rte_port *port = &ports[pi];
2081
2082         for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2083                 hairpin_conf.peers[0].port = pi;
2084                 hairpin_conf.peers[0].queue = i + nb_rxq;
2085                 diag = rte_eth_tx_hairpin_queue_setup
2086                         (pi, qi, nb_txd, &hairpin_conf);
2087                 i++;
2088                 if (diag == 0)
2089                         continue;
2090
2091                 /* Fail to setup rx queue, return */
2092                 if (rte_atomic16_cmpset(&(port->port_status),
2093                                         RTE_PORT_HANDLING,
2094                                         RTE_PORT_STOPPED) == 0)
2095                         printf("Port %d can not be set back "
2096                                         "to stopped\n", pi);
2097                 printf("Fail to configure port %d hairpin "
2098                                 "queues\n", pi);
2099                 /* try to reconfigure queues next time */
2100                 port->need_reconfig_queues = 1;
2101                 return -1;
2102         }
2103         for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2104                 hairpin_conf.peers[0].port = pi;
2105                 hairpin_conf.peers[0].queue = i + nb_txq;
2106                 diag = rte_eth_rx_hairpin_queue_setup
2107                         (pi, qi, nb_rxd, &hairpin_conf);
2108                 i++;
2109                 if (diag == 0)
2110                         continue;
2111
2112                 /* Fail to setup rx queue, return */
2113                 if (rte_atomic16_cmpset(&(port->port_status),
2114                                         RTE_PORT_HANDLING,
2115                                         RTE_PORT_STOPPED) == 0)
2116                         printf("Port %d can not be set back "
2117                                         "to stopped\n", pi);
2118                 printf("Fail to configure port %d hairpin "
2119                                 "queues\n", pi);
2120                 /* try to reconfigure queues next time */
2121                 port->need_reconfig_queues = 1;
2122                 return -1;
2123         }
2124         return 0;
2125 }
2126
2127 int
2128 start_port(portid_t pid)
2129 {
2130         int diag, need_check_link_status = -1;
2131         portid_t pi;
2132         queueid_t qi;
2133         struct rte_port *port;
2134         struct rte_ether_addr mac_addr;
2135         struct rte_eth_hairpin_cap cap;
2136
2137         if (port_id_is_invalid(pid, ENABLED_WARN))
2138                 return 0;
2139
2140         if(dcb_config)
2141                 dcb_test = 1;
2142         RTE_ETH_FOREACH_DEV(pi) {
2143                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2144                         continue;
2145
2146                 need_check_link_status = 0;
2147                 port = &ports[pi];
2148                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2149                                                  RTE_PORT_HANDLING) == 0) {
2150                         printf("Port %d is now not stopped\n", pi);
2151                         continue;
2152                 }
2153
2154                 if (port->need_reconfig > 0) {
2155                         port->need_reconfig = 0;
2156
2157                         if (flow_isolate_all) {
2158                                 int ret = port_flow_isolate(pi, 1);
2159                                 if (ret) {
2160                                         printf("Failed to apply isolated"
2161                                                " mode on port %d\n", pi);
2162                                         return -1;
2163                                 }
2164                         }
2165                         configure_rxtx_dump_callbacks(0);
2166                         printf("Configuring Port %d (socket %u)\n", pi,
2167                                         port->socket_id);
2168                         if (nb_hairpinq > 0 &&
2169                             rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2170                                 printf("Port %d doesn't support hairpin "
2171                                        "queues\n", pi);
2172                                 return -1;
2173                         }
2174                         /* configure port */
2175                         diag = rte_eth_dev_configure(pi, nb_rxq + nb_hairpinq,
2176                                                      nb_txq + nb_hairpinq,
2177                                                      &(port->dev_conf));
2178                         if (diag != 0) {
2179                                 if (rte_atomic16_cmpset(&(port->port_status),
2180                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2181                                         printf("Port %d can not be set back "
2182                                                         "to stopped\n", pi);
2183                                 printf("Fail to configure port %d\n", pi);
2184                                 /* try to reconfigure port next time */
2185                                 port->need_reconfig = 1;
2186                                 return -1;
2187                         }
2188                 }
2189                 if (port->need_reconfig_queues > 0) {
2190                         port->need_reconfig_queues = 0;
2191                         /* setup tx queues */
2192                         for (qi = 0; qi < nb_txq; qi++) {
2193                                 if ((numa_support) &&
2194                                         (txring_numa[pi] != NUMA_NO_CONFIG))
2195                                         diag = rte_eth_tx_queue_setup(pi, qi,
2196                                                 port->nb_tx_desc[qi],
2197                                                 txring_numa[pi],
2198                                                 &(port->tx_conf[qi]));
2199                                 else
2200                                         diag = rte_eth_tx_queue_setup(pi, qi,
2201                                                 port->nb_tx_desc[qi],
2202                                                 port->socket_id,
2203                                                 &(port->tx_conf[qi]));
2204
2205                                 if (diag == 0)
2206                                         continue;
2207
2208                                 /* Fail to setup tx queue, return */
2209                                 if (rte_atomic16_cmpset(&(port->port_status),
2210                                                         RTE_PORT_HANDLING,
2211                                                         RTE_PORT_STOPPED) == 0)
2212                                         printf("Port %d can not be set back "
2213                                                         "to stopped\n", pi);
2214                                 printf("Fail to configure port %d tx queues\n",
2215                                        pi);
2216                                 /* try to reconfigure queues next time */
2217                                 port->need_reconfig_queues = 1;
2218                                 return -1;
2219                         }
2220                         for (qi = 0; qi < nb_rxq; qi++) {
2221                                 /* setup rx queues */
2222                                 if ((numa_support) &&
2223                                         (rxring_numa[pi] != NUMA_NO_CONFIG)) {
2224                                         struct rte_mempool * mp =
2225                                                 mbuf_pool_find(rxring_numa[pi]);
2226                                         if (mp == NULL) {
2227                                                 printf("Failed to setup RX queue:"
2228                                                         "No mempool allocation"
2229                                                         " on the socket %d\n",
2230                                                         rxring_numa[pi]);
2231                                                 return -1;
2232                                         }
2233
2234                                         diag = rte_eth_rx_queue_setup(pi, qi,
2235                                              port->nb_rx_desc[qi],
2236                                              rxring_numa[pi],
2237                                              &(port->rx_conf[qi]),
2238                                              mp);
2239                                 } else {
2240                                         struct rte_mempool *mp =
2241                                                 mbuf_pool_find(port->socket_id);
2242                                         if (mp == NULL) {
2243                                                 printf("Failed to setup RX queue:"
2244                                                         "No mempool allocation"
2245                                                         " on the socket %d\n",
2246                                                         port->socket_id);
2247                                                 return -1;
2248                                         }
2249                                         diag = rte_eth_rx_queue_setup(pi, qi,
2250                                              port->nb_rx_desc[qi],
2251                                              port->socket_id,
2252                                              &(port->rx_conf[qi]),
2253                                              mp);
2254                                 }
2255                                 if (diag == 0)
2256                                         continue;
2257
2258                                 /* Fail to setup rx queue, return */
2259                                 if (rte_atomic16_cmpset(&(port->port_status),
2260                                                         RTE_PORT_HANDLING,
2261                                                         RTE_PORT_STOPPED) == 0)
2262                                         printf("Port %d can not be set back "
2263                                                         "to stopped\n", pi);
2264                                 printf("Fail to configure port %d rx queues\n",
2265                                        pi);
2266                                 /* try to reconfigure queues next time */
2267                                 port->need_reconfig_queues = 1;
2268                                 return -1;
2269                         }
2270                         /* setup hairpin queues */
2271                         if (setup_hairpin_queues(pi) != 0)
2272                                 return -1;
2273                 }
2274                 configure_rxtx_dump_callbacks(verbose_level);
2275                 if (clear_ptypes) {
2276                         diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2277                                         NULL, 0);
2278                         if (diag < 0)
2279                                 printf(
2280                                 "Port %d: Failed to disable Ptype parsing\n",
2281                                 pi);
2282                 }
2283
2284                 /* start port */
2285                 if (rte_eth_dev_start(pi) < 0) {
2286                         printf("Fail to start port %d\n", pi);
2287
2288                         /* Fail to setup rx queue, return */
2289                         if (rte_atomic16_cmpset(&(port->port_status),
2290                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2291                                 printf("Port %d can not be set back to "
2292                                                         "stopped\n", pi);
2293                         continue;
2294                 }
2295
2296                 if (rte_atomic16_cmpset(&(port->port_status),
2297                         RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2298                         printf("Port %d can not be set into started\n", pi);
2299
2300                 if (eth_macaddr_get_print_err(pi, &mac_addr) == 0)
2301                         printf("Port %d: %02X:%02X:%02X:%02X:%02X:%02X\n", pi,
2302                                 mac_addr.addr_bytes[0], mac_addr.addr_bytes[1],
2303                                 mac_addr.addr_bytes[2], mac_addr.addr_bytes[3],
2304                                 mac_addr.addr_bytes[4], mac_addr.addr_bytes[5]);
2305
2306                 /* at least one port started, need checking link status */
2307                 need_check_link_status = 1;
2308         }
2309
2310         if (need_check_link_status == 1 && !no_link_check)
2311                 check_all_ports_link_status(RTE_PORT_ALL);
2312         else if (need_check_link_status == 0)
2313                 printf("Please stop the ports first\n");
2314
2315         printf("Done\n");
2316         return 0;
2317 }
2318
2319 void
2320 stop_port(portid_t pid)
2321 {
2322         portid_t pi;
2323         struct rte_port *port;
2324         int need_check_link_status = 0;
2325
2326         if (dcb_test) {
2327                 dcb_test = 0;
2328                 dcb_config = 0;
2329         }
2330
2331         if (port_id_is_invalid(pid, ENABLED_WARN))
2332                 return;
2333
2334         printf("Stopping ports...\n");
2335
2336         RTE_ETH_FOREACH_DEV(pi) {
2337                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2338                         continue;
2339
2340                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
2341                         printf("Please remove port %d from forwarding configuration.\n", pi);
2342                         continue;
2343                 }
2344
2345                 if (port_is_bonding_slave(pi)) {
2346                         printf("Please remove port %d from bonded device.\n", pi);
2347                         continue;
2348                 }
2349
2350                 port = &ports[pi];
2351                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
2352                                                 RTE_PORT_HANDLING) == 0)
2353                         continue;
2354
2355                 rte_eth_dev_stop(pi);
2356
2357                 if (rte_atomic16_cmpset(&(port->port_status),
2358                         RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2359                         printf("Port %d can not be set into stopped\n", pi);
2360                 need_check_link_status = 1;
2361         }
2362         if (need_check_link_status && !no_link_check)
2363                 check_all_ports_link_status(RTE_PORT_ALL);
2364
2365         printf("Done\n");
2366 }
2367
2368 static void
2369 remove_invalid_ports_in(portid_t *array, portid_t *total)
2370 {
2371         portid_t i;
2372         portid_t new_total = 0;
2373
2374         for (i = 0; i < *total; i++)
2375                 if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
2376                         array[new_total] = array[i];
2377                         new_total++;
2378                 }
2379         *total = new_total;
2380 }
2381
2382 static void
2383 remove_invalid_ports(void)
2384 {
2385         remove_invalid_ports_in(ports_ids, &nb_ports);
2386         remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
2387         nb_cfg_ports = nb_fwd_ports;
2388 }
2389
2390 void
2391 close_port(portid_t pid)
2392 {
2393         portid_t pi;
2394         struct rte_port *port;
2395
2396         if (port_id_is_invalid(pid, ENABLED_WARN))
2397                 return;
2398
2399         printf("Closing ports...\n");
2400
2401         RTE_ETH_FOREACH_DEV(pi) {
2402                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2403                         continue;
2404
2405                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
2406                         printf("Please remove port %d from forwarding configuration.\n", pi);
2407                         continue;
2408                 }
2409
2410                 if (port_is_bonding_slave(pi)) {
2411                         printf("Please remove port %d from bonded device.\n", pi);
2412                         continue;
2413                 }
2414
2415                 port = &ports[pi];
2416                 if (rte_atomic16_cmpset(&(port->port_status),
2417                         RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
2418                         printf("Port %d is already closed\n", pi);
2419                         continue;
2420                 }
2421
2422                 if (rte_atomic16_cmpset(&(port->port_status),
2423                         RTE_PORT_STOPPED, RTE_PORT_HANDLING) == 0) {
2424                         printf("Port %d is now not stopped\n", pi);
2425                         continue;
2426                 }
2427
2428                 if (port->flow_list)
2429                         port_flow_flush(pi);
2430                 rte_eth_dev_close(pi);
2431
2432                 remove_invalid_ports();
2433
2434                 if (rte_atomic16_cmpset(&(port->port_status),
2435                         RTE_PORT_HANDLING, RTE_PORT_CLOSED) == 0)
2436                         printf("Port %d cannot be set to closed\n", pi);
2437         }
2438
2439         printf("Done\n");
2440 }
2441
2442 void
2443 reset_port(portid_t pid)
2444 {
2445         int diag;
2446         portid_t pi;
2447         struct rte_port *port;
2448
2449         if (port_id_is_invalid(pid, ENABLED_WARN))
2450                 return;
2451
2452         if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
2453                 (pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
2454                 printf("Can not reset port(s), please stop port(s) first.\n");
2455                 return;
2456         }
2457
2458         printf("Resetting ports...\n");
2459
2460         RTE_ETH_FOREACH_DEV(pi) {
2461                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2462                         continue;
2463
2464                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
2465                         printf("Please remove port %d from forwarding "
2466                                "configuration.\n", pi);
2467                         continue;
2468                 }
2469
2470                 if (port_is_bonding_slave(pi)) {
2471                         printf("Please remove port %d from bonded device.\n",
2472                                pi);
2473                         continue;
2474                 }
2475
2476                 diag = rte_eth_dev_reset(pi);
2477                 if (diag == 0) {
2478                         port = &ports[pi];
2479                         port->need_reconfig = 1;
2480                         port->need_reconfig_queues = 1;
2481                 } else {
2482                         printf("Failed to reset port %d. diag=%d\n", pi, diag);
2483                 }
2484         }
2485
2486         printf("Done\n");
2487 }
2488
2489 void
2490 attach_port(char *identifier)
2491 {
2492         portid_t pi;
2493         struct rte_dev_iterator iterator;
2494
2495         printf("Attaching a new port...\n");
2496
2497         if (identifier == NULL) {
2498                 printf("Invalid parameters are specified\n");
2499                 return;
2500         }
2501
2502         if (rte_dev_probe(identifier) < 0) {
2503                 TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
2504                 return;
2505         }
2506
2507         /* first attach mode: event */
2508         if (setup_on_probe_event) {
2509                 /* new ports are detected on RTE_ETH_EVENT_NEW event */
2510                 for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
2511                         if (ports[pi].port_status == RTE_PORT_HANDLING &&
2512                                         ports[pi].need_setup != 0)
2513                                 setup_attached_port(pi);
2514                 return;
2515         }
2516
2517         /* second attach mode: iterator */
2518         RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
2519                 /* setup ports matching the devargs used for probing */
2520                 if (port_is_forwarding(pi))
2521                         continue; /* port was already attached before */
2522                 setup_attached_port(pi);
2523         }
2524 }
2525
2526 static void
2527 setup_attached_port(portid_t pi)
2528 {
2529         unsigned int socket_id;
2530         int ret;
2531
2532         socket_id = (unsigned)rte_eth_dev_socket_id(pi);
2533         /* if socket_id is invalid, set to the first available socket. */
2534         if (check_socket_id(socket_id) < 0)
2535                 socket_id = socket_ids[0];
2536         reconfig(pi, socket_id);
2537         ret = rte_eth_promiscuous_enable(pi);
2538         if (ret != 0)
2539                 printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
2540                         pi, rte_strerror(-ret));
2541
2542         ports_ids[nb_ports++] = pi;
2543         fwd_ports_ids[nb_fwd_ports++] = pi;
2544         nb_cfg_ports = nb_fwd_ports;
2545         ports[pi].need_setup = 0;
2546         ports[pi].port_status = RTE_PORT_STOPPED;
2547
2548         printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
2549         printf("Done\n");
2550 }
2551
2552 void
2553 detach_port_device(portid_t port_id)
2554 {
2555         struct rte_device *dev;
2556         portid_t sibling;
2557
2558         printf("Removing a device...\n");
2559
2560         if (port_id_is_invalid(port_id, ENABLED_WARN))
2561                 return;
2562
2563         dev = rte_eth_devices[port_id].device;
2564         if (dev == NULL) {
2565                 printf("Device already removed\n");
2566                 return;
2567         }
2568
2569         if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2570                 if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2571                         printf("Port not stopped\n");
2572                         return;
2573                 }
2574                 printf("Port was not closed\n");
2575                 if (ports[port_id].flow_list)
2576                         port_flow_flush(port_id);
2577         }
2578
2579         if (rte_dev_remove(dev) < 0) {
2580                 TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
2581                 return;
2582         }
2583         RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
2584                 /* reset mapping between old ports and removed device */
2585                 rte_eth_devices[sibling].device = NULL;
2586                 if (ports[sibling].port_status != RTE_PORT_CLOSED) {
2587                         /* sibling ports are forced to be closed */
2588                         ports[sibling].port_status = RTE_PORT_CLOSED;
2589                         printf("Port %u is closed\n", sibling);
2590                 }
2591         }
2592
2593         remove_invalid_ports();
2594
2595         printf("Device of port %u is detached\n", port_id);
2596         printf("Now total ports is %d\n", nb_ports);
2597         printf("Done\n");
2598         return;
2599 }
2600
2601 void
2602 detach_device(char *identifier)
2603 {
2604         struct rte_dev_iterator iterator;
2605         struct rte_devargs da;
2606         portid_t port_id;
2607
2608         printf("Removing a device...\n");
2609
2610         memset(&da, 0, sizeof(da));
2611         if (rte_devargs_parsef(&da, "%s", identifier)) {
2612                 printf("cannot parse identifier\n");
2613                 if (da.args)
2614                         free(da.args);
2615                 return;
2616         }
2617
2618         RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
2619                 if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2620                         if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2621                                 printf("Port %u not stopped\n", port_id);
2622                                 rte_eth_iterator_cleanup(&iterator);
2623                                 return;
2624                         }
2625
2626                         /* sibling ports are forced to be closed */
2627                         if (ports[port_id].flow_list)
2628                                 port_flow_flush(port_id);
2629                         ports[port_id].port_status = RTE_PORT_CLOSED;
2630                         printf("Port %u is now closed\n", port_id);
2631                 }
2632         }
2633
2634         if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
2635                 TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
2636                             da.name, da.bus->name);
2637                 return;
2638         }
2639
2640         remove_invalid_ports();
2641
2642         printf("Device %s is detached\n", identifier);
2643         printf("Now total ports is %d\n", nb_ports);
2644         printf("Done\n");
2645 }
2646
2647 void
2648 pmd_test_exit(void)
2649 {
2650         portid_t pt_id;
2651         int ret;
2652         int i;
2653
2654         if (test_done == 0)
2655                 stop_packet_forwarding();
2656
2657         for (i = 0 ; i < RTE_MAX_NUMA_NODES ; i++) {
2658                 if (mempools[i]) {
2659                         if (mp_alloc_type == MP_ALLOC_ANON)
2660                                 rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
2661                                                      NULL);
2662                 }
2663         }
2664         if (ports != NULL) {
2665                 no_link_check = 1;
2666                 RTE_ETH_FOREACH_DEV(pt_id) {
2667                         printf("\nStopping port %d...\n", pt_id);
2668                         fflush(stdout);
2669                         stop_port(pt_id);
2670                 }
2671                 RTE_ETH_FOREACH_DEV(pt_id) {
2672                         printf("\nShutting down port %d...\n", pt_id);
2673                         fflush(stdout);
2674                         close_port(pt_id);
2675                 }
2676         }
2677
2678         if (hot_plug) {
2679                 ret = rte_dev_event_monitor_stop();
2680                 if (ret) {
2681                         RTE_LOG(ERR, EAL,
2682                                 "fail to stop device event monitor.");
2683                         return;
2684                 }
2685
2686                 ret = rte_dev_event_callback_unregister(NULL,
2687                         dev_event_callback, NULL);
2688                 if (ret < 0) {
2689                         RTE_LOG(ERR, EAL,
2690                                 "fail to unregister device event callback.\n");
2691                         return;
2692                 }
2693
2694                 ret = rte_dev_hotplug_handle_disable();
2695                 if (ret) {
2696                         RTE_LOG(ERR, EAL,
2697                                 "fail to disable hotplug handling.\n");
2698                         return;
2699                 }
2700         }
2701         for (i = 0 ; i < RTE_MAX_NUMA_NODES ; i++) {
2702                 if (mempools[i])
2703                         rte_mempool_free(mempools[i]);
2704         }
2705
2706         printf("\nBye...\n");
2707 }
2708
2709 typedef void (*cmd_func_t)(void);
2710 struct pmd_test_command {
2711         const char *cmd_name;
2712         cmd_func_t cmd_func;
2713 };
2714
2715 #define PMD_TEST_CMD_NB (sizeof(pmd_test_menu) / sizeof(pmd_test_menu[0]))
2716
2717 /* Check the link status of all ports in up to 9s, and print them finally */
2718 static void
2719 check_all_ports_link_status(uint32_t port_mask)
2720 {
2721 #define CHECK_INTERVAL 100 /* 100ms */
2722 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
2723         portid_t portid;
2724         uint8_t count, all_ports_up, print_flag = 0;
2725         struct rte_eth_link link;
2726         int ret;
2727
2728         printf("Checking link statuses...\n");
2729         fflush(stdout);
2730         for (count = 0; count <= MAX_CHECK_TIME; count++) {
2731                 all_ports_up = 1;
2732                 RTE_ETH_FOREACH_DEV(portid) {
2733                         if ((port_mask & (1 << portid)) == 0)
2734                                 continue;
2735                         memset(&link, 0, sizeof(link));
2736                         ret = rte_eth_link_get_nowait(portid, &link);
2737                         if (ret < 0) {
2738                                 all_ports_up = 0;
2739                                 if (print_flag == 1)
2740                                         printf("Port %u link get failed: %s\n",
2741                                                 portid, rte_strerror(-ret));
2742                                 continue;
2743                         }
2744                         /* print link status if flag set */
2745                         if (print_flag == 1) {
2746                                 if (link.link_status)
2747                                         printf(
2748                                         "Port%d Link Up. speed %u Mbps- %s\n",
2749                                         portid, link.link_speed,
2750                                 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
2751                                         ("full-duplex") : ("half-duplex\n"));
2752                                 else
2753                                         printf("Port %d Link Down\n", portid);
2754                                 continue;
2755                         }
2756                         /* clear all_ports_up flag if any link down */
2757                         if (link.link_status == ETH_LINK_DOWN) {
2758                                 all_ports_up = 0;
2759                                 break;
2760                         }
2761                 }
2762                 /* after finally printing all link status, get out */
2763                 if (print_flag == 1)
2764                         break;
2765
2766                 if (all_ports_up == 0) {
2767                         fflush(stdout);
2768                         rte_delay_ms(CHECK_INTERVAL);
2769                 }
2770
2771                 /* set the print_flag if all ports up or timeout */
2772                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
2773                         print_flag = 1;
2774                 }
2775
2776                 if (lsc_interrupt)
2777                         break;
2778         }
2779 }
2780
2781 /*
2782  * This callback is for remove a port for a device. It has limitation because
2783  * it is not for multiple port removal for a device.
2784  * TODO: the device detach invoke will plan to be removed from user side to
2785  * eal. And convert all PMDs to free port resources on ether device closing.
2786  */
2787 static void
2788 rmv_port_callback(void *arg)
2789 {
2790         int need_to_start = 0;
2791         int org_no_link_check = no_link_check;
2792         portid_t port_id = (intptr_t)arg;
2793
2794         RTE_ETH_VALID_PORTID_OR_RET(port_id);
2795
2796         if (!test_done && port_is_forwarding(port_id)) {
2797                 need_to_start = 1;
2798                 stop_packet_forwarding();
2799         }
2800         no_link_check = 1;
2801         stop_port(port_id);
2802         no_link_check = org_no_link_check;
2803         close_port(port_id);
2804         detach_port_device(port_id);
2805         if (need_to_start)
2806                 start_packet_forwarding(0);
2807 }
2808
2809 /* This function is used by the interrupt thread */
2810 static int
2811 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
2812                   void *ret_param)
2813 {
2814         RTE_SET_USED(param);
2815         RTE_SET_USED(ret_param);
2816
2817         if (type >= RTE_ETH_EVENT_MAX) {
2818                 fprintf(stderr, "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
2819                         port_id, __func__, type);
2820                 fflush(stderr);
2821         } else if (event_print_mask & (UINT32_C(1) << type)) {
2822                 printf("\nPort %" PRIu16 ": %s event\n", port_id,
2823                         eth_event_desc[type]);
2824                 fflush(stdout);
2825         }
2826
2827         switch (type) {
2828         case RTE_ETH_EVENT_NEW:
2829                 ports[port_id].need_setup = 1;
2830                 ports[port_id].port_status = RTE_PORT_HANDLING;
2831                 break;
2832         case RTE_ETH_EVENT_INTR_RMV:
2833                 if (port_id_is_invalid(port_id, DISABLED_WARN))
2834                         break;
2835                 if (rte_eal_alarm_set(100000,
2836                                 rmv_port_callback, (void *)(intptr_t)port_id))
2837                         fprintf(stderr, "Could not set up deferred device removal\n");
2838                 break;
2839         default:
2840                 break;
2841         }
2842         return 0;
2843 }
2844
2845 static int
2846 register_eth_event_callback(void)
2847 {
2848         int ret;
2849         enum rte_eth_event_type event;
2850
2851         for (event = RTE_ETH_EVENT_UNKNOWN;
2852                         event < RTE_ETH_EVENT_MAX; event++) {
2853                 ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
2854                                 event,
2855                                 eth_event_callback,
2856                                 NULL);
2857                 if (ret != 0) {
2858                         TESTPMD_LOG(ERR, "Failed to register callback for "
2859                                         "%s event\n", eth_event_desc[event]);
2860                         return -1;
2861                 }
2862         }
2863
2864         return 0;
2865 }
2866
2867 /* This function is used by the interrupt thread */
2868 static void
2869 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
2870                              __rte_unused void *arg)
2871 {
2872         uint16_t port_id;
2873         int ret;
2874
2875         if (type >= RTE_DEV_EVENT_MAX) {
2876                 fprintf(stderr, "%s called upon invalid event %d\n",
2877                         __func__, type);
2878                 fflush(stderr);
2879         }
2880
2881         switch (type) {
2882         case RTE_DEV_EVENT_REMOVE:
2883                 RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
2884                         device_name);
2885                 ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
2886                 if (ret) {
2887                         RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
2888                                 device_name);
2889                         return;
2890                 }
2891                 /*
2892                  * Because the user's callback is invoked in eal interrupt
2893                  * callback, the interrupt callback need to be finished before
2894                  * it can be unregistered when detaching device. So finish
2895                  * callback soon and use a deferred removal to detach device
2896                  * is need. It is a workaround, once the device detaching be
2897                  * moved into the eal in the future, the deferred removal could
2898                  * be deleted.
2899                  */
2900                 if (rte_eal_alarm_set(100000,
2901                                 rmv_port_callback, (void *)(intptr_t)port_id))
2902                         RTE_LOG(ERR, EAL,
2903                                 "Could not set up deferred device removal\n");
2904                 break;
2905         case RTE_DEV_EVENT_ADD:
2906                 RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
2907                         device_name);
2908                 /* TODO: After finish kernel driver binding,
2909                  * begin to attach port.
2910                  */
2911                 break;
2912         default:
2913                 break;
2914         }
2915 }
2916
2917 static int
2918 set_tx_queue_stats_mapping_registers(portid_t port_id, struct rte_port *port)
2919 {
2920         uint16_t i;
2921         int diag;
2922         uint8_t mapping_found = 0;
2923
2924         for (i = 0; i < nb_tx_queue_stats_mappings; i++) {
2925                 if ((tx_queue_stats_mappings[i].port_id == port_id) &&
2926                                 (tx_queue_stats_mappings[i].queue_id < nb_txq )) {
2927                         diag = rte_eth_dev_set_tx_queue_stats_mapping(port_id,
2928                                         tx_queue_stats_mappings[i].queue_id,
2929                                         tx_queue_stats_mappings[i].stats_counter_id);
2930                         if (diag != 0)
2931                                 return diag;
2932                         mapping_found = 1;
2933                 }
2934         }
2935         if (mapping_found)
2936                 port->tx_queue_stats_mapping_enabled = 1;
2937         return 0;
2938 }
2939
2940 static int
2941 set_rx_queue_stats_mapping_registers(portid_t port_id, struct rte_port *port)
2942 {
2943         uint16_t i;
2944         int diag;
2945         uint8_t mapping_found = 0;
2946
2947         for (i = 0; i < nb_rx_queue_stats_mappings; i++) {
2948                 if ((rx_queue_stats_mappings[i].port_id == port_id) &&
2949                                 (rx_queue_stats_mappings[i].queue_id < nb_rxq )) {
2950                         diag = rte_eth_dev_set_rx_queue_stats_mapping(port_id,
2951                                         rx_queue_stats_mappings[i].queue_id,
2952                                         rx_queue_stats_mappings[i].stats_counter_id);
2953                         if (diag != 0)
2954                                 return diag;
2955                         mapping_found = 1;
2956                 }
2957         }
2958         if (mapping_found)
2959                 port->rx_queue_stats_mapping_enabled = 1;
2960         return 0;
2961 }
2962
2963 static void
2964 map_port_queue_stats_mapping_registers(portid_t pi, struct rte_port *port)
2965 {
2966         int diag = 0;
2967
2968         diag = set_tx_queue_stats_mapping_registers(pi, port);
2969         if (diag != 0) {
2970                 if (diag == -ENOTSUP) {
2971                         port->tx_queue_stats_mapping_enabled = 0;
2972                         printf("TX queue stats mapping not supported port id=%d\n", pi);
2973                 }
2974                 else
2975                         rte_exit(EXIT_FAILURE,
2976                                         "set_tx_queue_stats_mapping_registers "
2977                                         "failed for port id=%d diag=%d\n",
2978                                         pi, diag);
2979         }
2980
2981         diag = set_rx_queue_stats_mapping_registers(pi, port);
2982         if (diag != 0) {
2983                 if (diag == -ENOTSUP) {
2984                         port->rx_queue_stats_mapping_enabled = 0;
2985                         printf("RX queue stats mapping not supported port id=%d\n", pi);
2986                 }
2987                 else
2988                         rte_exit(EXIT_FAILURE,
2989                                         "set_rx_queue_stats_mapping_registers "
2990                                         "failed for port id=%d diag=%d\n",
2991                                         pi, diag);
2992         }
2993 }
2994
2995 static void
2996 rxtx_port_config(struct rte_port *port)
2997 {
2998         uint16_t qid;
2999         uint64_t offloads;
3000
3001         for (qid = 0; qid < nb_rxq; qid++) {
3002                 offloads = port->rx_conf[qid].offloads;
3003                 port->rx_conf[qid] = port->dev_info.default_rxconf;
3004                 if (offloads != 0)
3005                         port->rx_conf[qid].offloads = offloads;
3006
3007                 /* Check if any Rx parameters have been passed */
3008                 if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3009                         port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3010
3011                 if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3012                         port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3013
3014                 if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3015                         port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3016
3017                 if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3018                         port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3019
3020                 if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3021                         port->rx_conf[qid].rx_drop_en = rx_drop_en;
3022
3023                 port->nb_rx_desc[qid] = nb_rxd;
3024         }
3025
3026         for (qid = 0; qid < nb_txq; qid++) {
3027                 offloads = port->tx_conf[qid].offloads;
3028                 port->tx_conf[qid] = port->dev_info.default_txconf;
3029                 if (offloads != 0)
3030                         port->tx_conf[qid].offloads = offloads;
3031
3032                 /* Check if any Tx parameters have been passed */
3033                 if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3034                         port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3035
3036                 if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3037                         port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3038
3039                 if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3040                         port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3041
3042                 if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3043                         port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3044
3045                 if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3046                         port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3047
3048                 port->nb_tx_desc[qid] = nb_txd;
3049         }
3050 }
3051
3052 void
3053 init_port_config(void)
3054 {
3055         portid_t pid;
3056         struct rte_port *port;
3057         int ret;
3058
3059         RTE_ETH_FOREACH_DEV(pid) {
3060                 port = &ports[pid];
3061                 port->dev_conf.fdir_conf = fdir_conf;
3062
3063                 ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3064                 if (ret != 0)
3065                         return;
3066
3067                 if (nb_rxq > 1) {
3068                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3069                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3070                                 rss_hf & port->dev_info.flow_type_rss_offloads;
3071                 } else {
3072                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3073                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3074                 }
3075
3076                 if (port->dcb_flag == 0) {
3077                         if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
3078                                 port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
3079                         else
3080                                 port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3081                 }
3082
3083                 rxtx_port_config(port);
3084
3085                 ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3086                 if (ret != 0)
3087                         return;
3088
3089                 map_port_queue_stats_mapping_registers(pid, port);
3090 #if defined RTE_LIBRTE_IXGBE_PMD && defined RTE_LIBRTE_IXGBE_BYPASS
3091                 rte_pmd_ixgbe_bypass_init(pid);
3092 #endif
3093
3094                 if (lsc_interrupt &&
3095                     (rte_eth_devices[pid].data->dev_flags &
3096                      RTE_ETH_DEV_INTR_LSC))
3097                         port->dev_conf.intr_conf.lsc = 1;
3098                 if (rmv_interrupt &&
3099                     (rte_eth_devices[pid].data->dev_flags &
3100                      RTE_ETH_DEV_INTR_RMV))
3101                         port->dev_conf.intr_conf.rmv = 1;
3102         }
3103 }
3104
3105 void set_port_slave_flag(portid_t slave_pid)
3106 {
3107         struct rte_port *port;
3108
3109         port = &ports[slave_pid];
3110         port->slave_flag = 1;
3111 }
3112
3113 void clear_port_slave_flag(portid_t slave_pid)
3114 {
3115         struct rte_port *port;
3116
3117         port = &ports[slave_pid];
3118         port->slave_flag = 0;
3119 }
3120
3121 uint8_t port_is_bonding_slave(portid_t slave_pid)
3122 {
3123         struct rte_port *port;
3124
3125         port = &ports[slave_pid];
3126         if ((rte_eth_devices[slave_pid].data->dev_flags &
3127             RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3128                 return 1;
3129         return 0;
3130 }
3131
3132 const uint16_t vlan_tags[] = {
3133                 0,  1,  2,  3,  4,  5,  6,  7,
3134                 8,  9, 10, 11,  12, 13, 14, 15,
3135                 16, 17, 18, 19, 20, 21, 22, 23,
3136                 24, 25, 26, 27, 28, 29, 30, 31
3137 };
3138
3139 static  int
3140 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3141                  enum dcb_mode_enable dcb_mode,
3142                  enum rte_eth_nb_tcs num_tcs,
3143                  uint8_t pfc_en)
3144 {
3145         uint8_t i;
3146         int32_t rc;
3147         struct rte_eth_rss_conf rss_conf;
3148
3149         /*
3150          * Builds up the correct configuration for dcb+vt based on the vlan tags array
3151          * given above, and the number of traffic classes available for use.
3152          */
3153         if (dcb_mode == DCB_VT_ENABLED) {
3154                 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3155                                 &eth_conf->rx_adv_conf.vmdq_dcb_conf;
3156                 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3157                                 &eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3158
3159                 /* VMDQ+DCB RX and TX configurations */
3160                 vmdq_rx_conf->enable_default_pool = 0;
3161                 vmdq_rx_conf->default_pool = 0;
3162                 vmdq_rx_conf->nb_queue_pools =
3163                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3164                 vmdq_tx_conf->nb_queue_pools =
3165                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3166
3167                 vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3168                 for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3169                         vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3170                         vmdq_rx_conf->pool_map[i].pools =
3171                                 1 << (i % vmdq_rx_conf->nb_queue_pools);
3172                 }
3173                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3174                         vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3175                         vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3176                 }
3177
3178                 /* set DCB mode of RX and TX of multiple queues */
3179                 eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_DCB;
3180                 eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3181         } else {
3182                 struct rte_eth_dcb_rx_conf *rx_conf =
3183                                 &eth_conf->rx_adv_conf.dcb_rx_conf;
3184                 struct rte_eth_dcb_tx_conf *tx_conf =
3185                                 &eth_conf->tx_adv_conf.dcb_tx_conf;
3186
3187                 rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3188                 if (rc != 0)
3189                         return rc;
3190
3191                 rx_conf->nb_tcs = num_tcs;
3192                 tx_conf->nb_tcs = num_tcs;
3193
3194                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3195                         rx_conf->dcb_tc[i] = i % num_tcs;
3196                         tx_conf->dcb_tc[i] = i % num_tcs;
3197                 }
3198
3199                 eth_conf->rxmode.mq_mode = ETH_MQ_RX_DCB_RSS;
3200                 eth_conf->rx_adv_conf.rss_conf = rss_conf;
3201                 eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3202         }
3203
3204         if (pfc_en)
3205                 eth_conf->dcb_capability_en =
3206                                 ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3207         else
3208                 eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3209
3210         return 0;
3211 }
3212
3213 int
3214 init_port_dcb_config(portid_t pid,
3215                      enum dcb_mode_enable dcb_mode,
3216                      enum rte_eth_nb_tcs num_tcs,
3217                      uint8_t pfc_en)
3218 {
3219         struct rte_eth_conf port_conf;
3220         struct rte_port *rte_port;
3221         int retval;
3222         uint16_t i;
3223
3224         rte_port = &ports[pid];
3225
3226         memset(&port_conf, 0, sizeof(struct rte_eth_conf));
3227         /* Enter DCB configuration status */
3228         dcb_config = 1;
3229
3230         port_conf.rxmode = rte_port->dev_conf.rxmode;
3231         port_conf.txmode = rte_port->dev_conf.txmode;
3232
3233         /*set configuration of DCB in vt mode and DCB in non-vt mode*/
3234         retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3235         if (retval < 0)
3236                 return retval;
3237         port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3238
3239         /* re-configure the device . */
3240         retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3241         if (retval < 0)
3242                 return retval;
3243
3244         retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3245         if (retval != 0)
3246                 return retval;
3247
3248         /* If dev_info.vmdq_pool_base is greater than 0,
3249          * the queue id of vmdq pools is started after pf queues.
3250          */
3251         if (dcb_mode == DCB_VT_ENABLED &&
3252             rte_port->dev_info.vmdq_pool_base > 0) {
3253                 printf("VMDQ_DCB multi-queue mode is nonsensical"
3254                         " for port %d.", pid);
3255                 return -1;
3256         }
3257
3258         /* Assume the ports in testpmd have the same dcb capability
3259          * and has the same number of rxq and txq in dcb mode
3260          */
3261         if (dcb_mode == DCB_VT_ENABLED) {
3262                 if (rte_port->dev_info.max_vfs > 0) {
3263                         nb_rxq = rte_port->dev_info.nb_rx_queues;
3264                         nb_txq = rte_port->dev_info.nb_tx_queues;
3265                 } else {
3266                         nb_rxq = rte_port->dev_info.max_rx_queues;
3267                         nb_txq = rte_port->dev_info.max_tx_queues;
3268                 }
3269         } else {
3270                 /*if vt is disabled, use all pf queues */
3271                 if (rte_port->dev_info.vmdq_pool_base == 0) {
3272                         nb_rxq = rte_port->dev_info.max_rx_queues;
3273                         nb_txq = rte_port->dev_info.max_tx_queues;
3274                 } else {
3275                         nb_rxq = (queueid_t)num_tcs;
3276                         nb_txq = (queueid_t)num_tcs;
3277
3278                 }
3279         }
3280         rx_free_thresh = 64;
3281
3282         memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3283
3284         rxtx_port_config(rte_port);
3285         /* VLAN filter */
3286         rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3287         for (i = 0; i < RTE_DIM(vlan_tags); i++)
3288                 rx_vft_set(pid, vlan_tags[i], 1);
3289
3290         retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
3291         if (retval != 0)
3292                 return retval;
3293
3294         map_port_queue_stats_mapping_registers(pid, rte_port);
3295
3296         rte_port->dcb_flag = 1;
3297
3298         return 0;
3299 }
3300
3301 static void
3302 init_port(void)
3303 {
3304         /* Configuration of Ethernet ports. */
3305         ports = rte_zmalloc("testpmd: ports",
3306                             sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
3307                             RTE_CACHE_LINE_SIZE);
3308         if (ports == NULL) {
3309                 rte_exit(EXIT_FAILURE,
3310                                 "rte_zmalloc(%d struct rte_port) failed\n",
3311                                 RTE_MAX_ETHPORTS);
3312         }
3313
3314         /* Initialize ports NUMA structures */
3315         memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3316         memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3317         memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3318 }
3319
3320 static void
3321 force_quit(void)
3322 {
3323         pmd_test_exit();
3324         prompt_exit();
3325 }
3326
3327 static void
3328 print_stats(void)
3329 {
3330         uint8_t i;
3331         const char clr[] = { 27, '[', '2', 'J', '\0' };
3332         const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
3333
3334         /* Clear screen and move to top left */
3335         printf("%s%s", clr, top_left);
3336
3337         printf("\nPort statistics ====================================");
3338         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
3339                 nic_stats_display(fwd_ports_ids[i]);
3340
3341         fflush(stdout);
3342 }
3343
3344 static void
3345 signal_handler(int signum)
3346 {
3347         if (signum == SIGINT || signum == SIGTERM) {
3348                 printf("\nSignal %d received, preparing to exit...\n",
3349                                 signum);
3350 #ifdef RTE_LIBRTE_PDUMP
3351                 /* uninitialize packet capture framework */
3352                 rte_pdump_uninit();
3353 #endif
3354 #ifdef RTE_LIBRTE_LATENCY_STATS
3355                 if (latencystats_enabled != 0)
3356                         rte_latencystats_uninit();
3357 #endif
3358                 force_quit();
3359                 /* Set flag to indicate the force termination. */
3360                 f_quit = 1;
3361                 /* exit with the expected status */
3362                 signal(signum, SIG_DFL);
3363                 kill(getpid(), signum);
3364         }
3365 }
3366
3367 int
3368 main(int argc, char** argv)
3369 {
3370         int diag;
3371         portid_t port_id;
3372         uint16_t count;
3373         int ret;
3374
3375         signal(SIGINT, signal_handler);
3376         signal(SIGTERM, signal_handler);
3377
3378         testpmd_logtype = rte_log_register("testpmd");
3379         if (testpmd_logtype < 0)
3380                 rte_exit(EXIT_FAILURE, "Cannot register log type");
3381         rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
3382
3383         diag = rte_eal_init(argc, argv);
3384         if (diag < 0)
3385                 rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
3386                          rte_strerror(rte_errno));
3387
3388         if (rte_eal_process_type() == RTE_PROC_SECONDARY)
3389                 rte_exit(EXIT_FAILURE,
3390                          "Secondary process type not supported.\n");
3391
3392         ret = register_eth_event_callback();
3393         if (ret != 0)
3394                 rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
3395
3396 #ifdef RTE_LIBRTE_PDUMP
3397         /* initialize packet capture framework */
3398         rte_pdump_init();
3399 #endif
3400
3401         count = 0;
3402         RTE_ETH_FOREACH_DEV(port_id) {
3403                 ports_ids[count] = port_id;
3404                 count++;
3405         }
3406         nb_ports = (portid_t) count;
3407         if (nb_ports == 0)
3408                 TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
3409
3410         /* allocate port structures, and init them */
3411         init_port();
3412
3413         set_def_fwd_config();
3414         if (nb_lcores == 0)
3415                 rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
3416                          "Check the core mask argument\n");
3417
3418         /* Bitrate/latency stats disabled by default */
3419 #ifdef RTE_LIBRTE_BITRATE
3420         bitrate_enabled = 0;
3421 #endif
3422 #ifdef RTE_LIBRTE_LATENCY_STATS
3423         latencystats_enabled = 0;
3424 #endif
3425
3426         /* on FreeBSD, mlockall() is disabled by default */
3427 #ifdef RTE_EXEC_ENV_FREEBSD
3428         do_mlockall = 0;
3429 #else
3430         do_mlockall = 1;
3431 #endif
3432
3433         argc -= diag;
3434         argv += diag;
3435         if (argc > 1)
3436                 launch_args_parse(argc, argv);
3437
3438         if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
3439                 TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
3440                         strerror(errno));
3441         }
3442
3443         if (tx_first && interactive)
3444                 rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
3445                                 "interactive mode.\n");
3446
3447         if (tx_first && lsc_interrupt) {
3448                 printf("Warning: lsc_interrupt needs to be off when "
3449                                 " using tx_first. Disabling.\n");
3450                 lsc_interrupt = 0;
3451         }
3452
3453         if (!nb_rxq && !nb_txq)
3454                 printf("Warning: Either rx or tx queues should be non-zero\n");
3455
3456         if (nb_rxq > 1 && nb_rxq > nb_txq)
3457                 printf("Warning: nb_rxq=%d enables RSS configuration, "
3458                        "but nb_txq=%d will prevent to fully test it.\n",
3459                        nb_rxq, nb_txq);
3460
3461         init_config();
3462
3463         if (hot_plug) {
3464                 ret = rte_dev_hotplug_handle_enable();
3465                 if (ret) {
3466                         RTE_LOG(ERR, EAL,
3467                                 "fail to enable hotplug handling.");
3468                         return -1;
3469                 }
3470
3471                 ret = rte_dev_event_monitor_start();
3472                 if (ret) {
3473                         RTE_LOG(ERR, EAL,
3474                                 "fail to start device event monitoring.");
3475                         return -1;
3476                 }
3477
3478                 ret = rte_dev_event_callback_register(NULL,
3479                         dev_event_callback, NULL);
3480                 if (ret) {
3481                         RTE_LOG(ERR, EAL,
3482                                 "fail  to register device event callback\n");
3483                         return -1;
3484                 }
3485         }
3486
3487         if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
3488                 rte_exit(EXIT_FAILURE, "Start ports failed\n");
3489
3490         /* set all ports to promiscuous mode by default */
3491         RTE_ETH_FOREACH_DEV(port_id) {
3492                 ret = rte_eth_promiscuous_enable(port_id);
3493                 if (ret != 0)
3494                         printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
3495                                 port_id, rte_strerror(-ret));
3496         }
3497
3498         /* Init metrics library */
3499         rte_metrics_init(rte_socket_id());
3500
3501 #ifdef RTE_LIBRTE_LATENCY_STATS
3502         if (latencystats_enabled != 0) {
3503                 int ret = rte_latencystats_init(1, NULL);
3504                 if (ret)
3505                         printf("Warning: latencystats init()"
3506                                 " returned error %d\n", ret);
3507                 printf("Latencystats running on lcore %d\n",
3508                         latencystats_lcore_id);
3509         }
3510 #endif
3511
3512         /* Setup bitrate stats */
3513 #ifdef RTE_LIBRTE_BITRATE
3514         if (bitrate_enabled != 0) {
3515                 bitrate_data = rte_stats_bitrate_create();
3516                 if (bitrate_data == NULL)
3517                         rte_exit(EXIT_FAILURE,
3518                                 "Could not allocate bitrate data.\n");
3519                 rte_stats_bitrate_reg(bitrate_data);
3520         }
3521 #endif
3522
3523 #ifdef RTE_LIBRTE_CMDLINE
3524         if (strlen(cmdline_filename) != 0)
3525                 cmdline_read_from_file(cmdline_filename);
3526
3527         if (interactive == 1) {
3528                 if (auto_start) {
3529                         printf("Start automatic packet forwarding\n");
3530                         start_packet_forwarding(0);
3531                 }
3532                 prompt();
3533                 pmd_test_exit();
3534         } else
3535 #endif
3536         {
3537                 char c;
3538                 int rc;
3539
3540                 f_quit = 0;
3541
3542                 printf("No commandline core given, start packet forwarding\n");
3543                 start_packet_forwarding(tx_first);
3544                 if (stats_period != 0) {
3545                         uint64_t prev_time = 0, cur_time, diff_time = 0;
3546                         uint64_t timer_period;
3547
3548                         /* Convert to number of cycles */
3549                         timer_period = stats_period * rte_get_timer_hz();
3550
3551                         while (f_quit == 0) {
3552                                 cur_time = rte_get_timer_cycles();
3553                                 diff_time += cur_time - prev_time;
3554
3555                                 if (diff_time >= timer_period) {
3556                                         print_stats();
3557                                         /* Reset the timer */
3558                                         diff_time = 0;
3559                                 }
3560                                 /* Sleep to avoid unnecessary checks */
3561                                 prev_time = cur_time;
3562                                 sleep(1);
3563                         }
3564                 }
3565
3566                 printf("Press enter to exit\n");
3567                 rc = read(0, &c, 1);
3568                 pmd_test_exit();
3569                 if (rc < 0)
3570                         return 1;
3571         }
3572
3573         return 0;
3574 }