ethdev: fix max Rx packet length
[dpdk.git] / app / test-pmd / testpmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_atomic.h>
40 #include <rte_branch_prediction.h>
41 #include <rte_mempool.h>
42 #include <rte_malloc.h>
43 #include <rte_mbuf.h>
44 #include <rte_mbuf_pool_ops.h>
45 #include <rte_interrupts.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_dev.h>
50 #include <rte_string_fns.h>
51 #ifdef RTE_NET_IXGBE
52 #include <rte_pmd_ixgbe.h>
53 #endif
54 #ifdef RTE_LIB_PDUMP
55 #include <rte_pdump.h>
56 #endif
57 #include <rte_flow.h>
58 #include <rte_metrics.h>
59 #ifdef RTE_LIB_BITRATESTATS
60 #include <rte_bitrate.h>
61 #endif
62 #ifdef RTE_LIB_LATENCYSTATS
63 #include <rte_latencystats.h>
64 #endif
65 #ifdef RTE_EXEC_ENV_WINDOWS
66 #include <process.h>
67 #endif
68
69 #include "testpmd.h"
70
71 #ifndef MAP_HUGETLB
72 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
73 #define HUGE_FLAG (0x40000)
74 #else
75 #define HUGE_FLAG MAP_HUGETLB
76 #endif
77
78 #ifndef MAP_HUGE_SHIFT
79 /* older kernels (or FreeBSD) will not have this define */
80 #define HUGE_SHIFT (26)
81 #else
82 #define HUGE_SHIFT MAP_HUGE_SHIFT
83 #endif
84
85 #define EXTMEM_HEAP_NAME "extmem"
86 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
87
88 uint16_t verbose_level = 0; /**< Silent by default. */
89 int testpmd_logtype; /**< Log type for testpmd logs */
90
91 /* use main core for command line ? */
92 uint8_t interactive = 0;
93 uint8_t auto_start = 0;
94 uint8_t tx_first;
95 char cmdline_filename[PATH_MAX] = {0};
96
97 /*
98  * NUMA support configuration.
99  * When set, the NUMA support attempts to dispatch the allocation of the
100  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
101  * probed ports among the CPU sockets 0 and 1.
102  * Otherwise, all memory is allocated from CPU socket 0.
103  */
104 uint8_t numa_support = 1; /**< numa enabled by default */
105
106 /*
107  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
108  * not configured.
109  */
110 uint8_t socket_num = UMA_NO_CONFIG;
111
112 /*
113  * Select mempool allocation type:
114  * - native: use regular DPDK memory
115  * - anon: use regular DPDK memory to create mempool, but populate using
116  *         anonymous memory (may not be IOVA-contiguous)
117  * - xmem: use externally allocated hugepage memory
118  */
119 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
120
121 /*
122  * Store specified sockets on which memory pool to be used by ports
123  * is allocated.
124  */
125 uint8_t port_numa[RTE_MAX_ETHPORTS];
126
127 /*
128  * Store specified sockets on which RX ring to be used by ports
129  * is allocated.
130  */
131 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
132
133 /*
134  * Store specified sockets on which TX ring to be used by ports
135  * is allocated.
136  */
137 uint8_t txring_numa[RTE_MAX_ETHPORTS];
138
139 /*
140  * Record the Ethernet address of peer target ports to which packets are
141  * forwarded.
142  * Must be instantiated with the ethernet addresses of peer traffic generator
143  * ports.
144  */
145 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
146 portid_t nb_peer_eth_addrs = 0;
147
148 /*
149  * Probed Target Environment.
150  */
151 struct rte_port *ports;        /**< For all probed ethernet ports. */
152 portid_t nb_ports;             /**< Number of probed ethernet ports. */
153 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
154 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
155
156 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
157
158 /*
159  * Test Forwarding Configuration.
160  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
161  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
162  */
163 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
164 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
165 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
166 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
167
168 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
169 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
170
171 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
172 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
173
174 /*
175  * Forwarding engines.
176  */
177 struct fwd_engine * fwd_engines[] = {
178         &io_fwd_engine,
179         &mac_fwd_engine,
180         &mac_swap_engine,
181         &flow_gen_engine,
182         &rx_only_engine,
183         &tx_only_engine,
184         &csum_fwd_engine,
185         &icmp_echo_engine,
186         &noisy_vnf_engine,
187         &five_tuple_swap_fwd_engine,
188 #ifdef RTE_LIBRTE_IEEE1588
189         &ieee1588_fwd_engine,
190 #endif
191         NULL,
192 };
193
194 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
195 uint16_t mempool_flags;
196
197 struct fwd_config cur_fwd_config;
198 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
199 uint32_t retry_enabled;
200 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
201 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
202
203 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
204 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
205         DEFAULT_MBUF_DATA_SIZE
206 }; /**< Mbuf data space size. */
207 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
208                                       * specified on command-line. */
209 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
210
211 /** Extended statistics to show. */
212 struct rte_eth_xstat_name *xstats_display;
213
214 unsigned int xstats_display_num; /**< Size of extended statistics to show */
215
216 /*
217  * In container, it cannot terminate the process which running with 'stats-period'
218  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
219  */
220 uint8_t f_quit;
221
222 /*
223  * Max Rx frame size, set by '--max-pkt-len' parameter.
224  */
225 uint32_t max_rx_pkt_len;
226
227 /*
228  * Configuration of packet segments used to scatter received packets
229  * if some of split features is configured.
230  */
231 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
232 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
233 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
234 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
235
236 /*
237  * Configuration of packet segments used by the "txonly" processing engine.
238  */
239 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
240 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
241         TXONLY_DEF_PACKET_LEN,
242 };
243 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
244
245 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
246 /**< Split policy for packets to TX. */
247
248 uint8_t txonly_multi_flow;
249 /**< Whether multiple flows are generated in TXONLY mode. */
250
251 uint32_t tx_pkt_times_inter;
252 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
253
254 uint32_t tx_pkt_times_intra;
255 /**< Timings for send scheduling in TXONLY mode, time between packets. */
256
257 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
258 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
259 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
260 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
261
262 /* current configuration is in DCB or not,0 means it is not in DCB mode */
263 uint8_t dcb_config = 0;
264
265 /*
266  * Configurable number of RX/TX queues.
267  */
268 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
269 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
270 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
271
272 /*
273  * Configurable number of RX/TX ring descriptors.
274  * Defaults are supplied by drivers via ethdev.
275  */
276 #define RTE_TEST_RX_DESC_DEFAULT 0
277 #define RTE_TEST_TX_DESC_DEFAULT 0
278 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
279 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
280
281 #define RTE_PMD_PARAM_UNSET -1
282 /*
283  * Configurable values of RX and TX ring threshold registers.
284  */
285
286 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
287 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
288 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
289
290 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
291 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
292 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
293
294 /*
295  * Configurable value of RX free threshold.
296  */
297 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
298
299 /*
300  * Configurable value of RX drop enable.
301  */
302 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
303
304 /*
305  * Configurable value of TX free threshold.
306  */
307 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
308
309 /*
310  * Configurable value of TX RS bit threshold.
311  */
312 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
313
314 /*
315  * Configurable value of buffered packets before sending.
316  */
317 uint16_t noisy_tx_sw_bufsz;
318
319 /*
320  * Configurable value of packet buffer timeout.
321  */
322 uint16_t noisy_tx_sw_buf_flush_time;
323
324 /*
325  * Configurable value for size of VNF internal memory area
326  * used for simulating noisy neighbour behaviour
327  */
328 uint64_t noisy_lkup_mem_sz;
329
330 /*
331  * Configurable value of number of random writes done in
332  * VNF simulation memory area.
333  */
334 uint64_t noisy_lkup_num_writes;
335
336 /*
337  * Configurable value of number of random reads done in
338  * VNF simulation memory area.
339  */
340 uint64_t noisy_lkup_num_reads;
341
342 /*
343  * Configurable value of number of random reads/writes done in
344  * VNF simulation memory area.
345  */
346 uint64_t noisy_lkup_num_reads_writes;
347
348 /*
349  * Receive Side Scaling (RSS) configuration.
350  */
351 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
352
353 /*
354  * Port topology configuration
355  */
356 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
357
358 /*
359  * Avoids to flush all the RX streams before starts forwarding.
360  */
361 uint8_t no_flush_rx = 0; /* flush by default */
362
363 /*
364  * Flow API isolated mode.
365  */
366 uint8_t flow_isolate_all;
367
368 /*
369  * Avoids to check link status when starting/stopping a port.
370  */
371 uint8_t no_link_check = 0; /* check by default */
372
373 /*
374  * Don't automatically start all ports in interactive mode.
375  */
376 uint8_t no_device_start = 0;
377
378 /*
379  * Enable link status change notification
380  */
381 uint8_t lsc_interrupt = 1; /* enabled by default */
382
383 /*
384  * Enable device removal notification.
385  */
386 uint8_t rmv_interrupt = 1; /* enabled by default */
387
388 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
389
390 /* After attach, port setup is called on event or by iterator */
391 bool setup_on_probe_event = true;
392
393 /* Clear ptypes on port initialization. */
394 uint8_t clear_ptypes = true;
395
396 /* Hairpin ports configuration mode. */
397 uint16_t hairpin_mode;
398
399 /* Pretty printing of ethdev events */
400 static const char * const eth_event_desc[] = {
401         [RTE_ETH_EVENT_UNKNOWN] = "unknown",
402         [RTE_ETH_EVENT_INTR_LSC] = "link state change",
403         [RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
404         [RTE_ETH_EVENT_INTR_RESET] = "reset",
405         [RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
406         [RTE_ETH_EVENT_IPSEC] = "IPsec",
407         [RTE_ETH_EVENT_MACSEC] = "MACsec",
408         [RTE_ETH_EVENT_INTR_RMV] = "device removal",
409         [RTE_ETH_EVENT_NEW] = "device probed",
410         [RTE_ETH_EVENT_DESTROY] = "device released",
411         [RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
412         [RTE_ETH_EVENT_MAX] = NULL,
413 };
414
415 /*
416  * Display or mask ether events
417  * Default to all events except VF_MBOX
418  */
419 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
420                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
421                             (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
422                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
423                             (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
424                             (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
425                             (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
426                             (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
427 /*
428  * Decide if all memory are locked for performance.
429  */
430 int do_mlockall = 0;
431
432 /*
433  * NIC bypass mode configuration options.
434  */
435
436 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
437 /* The NIC bypass watchdog timeout. */
438 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
439 #endif
440
441
442 #ifdef RTE_LIB_LATENCYSTATS
443
444 /*
445  * Set when latency stats is enabled in the commandline
446  */
447 uint8_t latencystats_enabled;
448
449 /*
450  * Lcore ID to serive latency statistics.
451  */
452 lcoreid_t latencystats_lcore_id = -1;
453
454 #endif
455
456 /*
457  * Ethernet device configuration.
458  */
459 struct rte_eth_rxmode rx_mode;
460
461 struct rte_eth_txmode tx_mode = {
462         .offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
463 };
464
465 struct rte_fdir_conf fdir_conf = {
466         .mode = RTE_FDIR_MODE_NONE,
467         .pballoc = RTE_FDIR_PBALLOC_64K,
468         .status = RTE_FDIR_REPORT_STATUS,
469         .mask = {
470                 .vlan_tci_mask = 0xFFEF,
471                 .ipv4_mask     = {
472                         .src_ip = 0xFFFFFFFF,
473                         .dst_ip = 0xFFFFFFFF,
474                 },
475                 .ipv6_mask     = {
476                         .src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
477                         .dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
478                 },
479                 .src_port_mask = 0xFFFF,
480                 .dst_port_mask = 0xFFFF,
481                 .mac_addr_byte_mask = 0xFF,
482                 .tunnel_type_mask = 1,
483                 .tunnel_id_mask = 0xFFFFFFFF,
484         },
485         .drop_queue = 127,
486 };
487
488 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
489
490 /*
491  * Display zero values by default for xstats
492  */
493 uint8_t xstats_hide_zero;
494
495 /*
496  * Measure of CPU cycles disabled by default
497  */
498 uint8_t record_core_cycles;
499
500 /*
501  * Display of RX and TX bursts disabled by default
502  */
503 uint8_t record_burst_stats;
504
505 unsigned int num_sockets = 0;
506 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
507
508 #ifdef RTE_LIB_BITRATESTATS
509 /* Bitrate statistics */
510 struct rte_stats_bitrates *bitrate_data;
511 lcoreid_t bitrate_lcore_id;
512 uint8_t bitrate_enabled;
513 #endif
514
515 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
516 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
517
518 /*
519  * hexadecimal bitmask of RX mq mode can be enabled.
520  */
521 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
522
523 /*
524  * Used to set forced link speed
525  */
526 uint32_t eth_link_speed;
527
528 /*
529  * ID of the current process in multi-process, used to
530  * configure the queues to be polled.
531  */
532 int proc_id;
533
534 /*
535  * Number of processes in multi-process, used to
536  * configure the queues to be polled.
537  */
538 unsigned int num_procs = 1;
539
540 static void
541 eth_rx_metadata_negotiate_mp(uint16_t port_id)
542 {
543         uint64_t rx_meta_features = 0;
544         int ret;
545
546         if (!is_proc_primary())
547                 return;
548
549         rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
550         rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
551         rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
552
553         ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
554         if (ret == 0) {
555                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
556                         TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
557                                     port_id);
558                 }
559
560                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
561                         TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
562                                     port_id);
563                 }
564
565                 if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
566                         TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
567                                     port_id);
568                 }
569         } else if (ret != -ENOTSUP) {
570                 rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
571                          port_id, rte_strerror(-ret));
572         }
573 }
574
575 static void
576 flow_pick_transfer_proxy_mp(uint16_t port_id)
577 {
578         struct rte_port *port = &ports[port_id];
579         int ret;
580
581         port->flow_transfer_proxy = port_id;
582
583         if (!is_proc_primary())
584                 return;
585
586         ret = rte_flow_pick_transfer_proxy(port_id, &port->flow_transfer_proxy,
587                                            NULL);
588         if (ret != 0) {
589                 fprintf(stderr, "Error picking flow transfer proxy for port %u: %s - ignore\n",
590                         port_id, rte_strerror(-ret));
591         }
592 }
593
594 static int
595 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
596                       const struct rte_eth_conf *dev_conf)
597 {
598         if (is_proc_primary())
599                 return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
600                                         dev_conf);
601         return 0;
602 }
603
604 static int
605 eth_dev_start_mp(uint16_t port_id)
606 {
607         if (is_proc_primary())
608                 return rte_eth_dev_start(port_id);
609
610         return 0;
611 }
612
613 static int
614 eth_dev_stop_mp(uint16_t port_id)
615 {
616         if (is_proc_primary())
617                 return rte_eth_dev_stop(port_id);
618
619         return 0;
620 }
621
622 static void
623 mempool_free_mp(struct rte_mempool *mp)
624 {
625         if (is_proc_primary())
626                 rte_mempool_free(mp);
627 }
628
629 static int
630 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
631 {
632         if (is_proc_primary())
633                 return rte_eth_dev_set_mtu(port_id, mtu);
634
635         return 0;
636 }
637
638 /* Forward function declarations */
639 static void setup_attached_port(portid_t pi);
640 static void check_all_ports_link_status(uint32_t port_mask);
641 static int eth_event_callback(portid_t port_id,
642                               enum rte_eth_event_type type,
643                               void *param, void *ret_param);
644 static void dev_event_callback(const char *device_name,
645                                 enum rte_dev_event_type type,
646                                 void *param);
647 static void fill_xstats_display_info(void);
648
649 /*
650  * Check if all the ports are started.
651  * If yes, return positive value. If not, return zero.
652  */
653 static int all_ports_started(void);
654
655 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
656 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
657
658 /* Holds the registered mbuf dynamic flags names. */
659 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
660
661
662 /*
663  * Helper function to check if socket is already discovered.
664  * If yes, return positive value. If not, return zero.
665  */
666 int
667 new_socket_id(unsigned int socket_id)
668 {
669         unsigned int i;
670
671         for (i = 0; i < num_sockets; i++) {
672                 if (socket_ids[i] == socket_id)
673                         return 0;
674         }
675         return 1;
676 }
677
678 /*
679  * Setup default configuration.
680  */
681 static void
682 set_default_fwd_lcores_config(void)
683 {
684         unsigned int i;
685         unsigned int nb_lc;
686         unsigned int sock_num;
687
688         nb_lc = 0;
689         for (i = 0; i < RTE_MAX_LCORE; i++) {
690                 if (!rte_lcore_is_enabled(i))
691                         continue;
692                 sock_num = rte_lcore_to_socket_id(i);
693                 if (new_socket_id(sock_num)) {
694                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
695                                 rte_exit(EXIT_FAILURE,
696                                          "Total sockets greater than %u\n",
697                                          RTE_MAX_NUMA_NODES);
698                         }
699                         socket_ids[num_sockets++] = sock_num;
700                 }
701                 if (i == rte_get_main_lcore())
702                         continue;
703                 fwd_lcores_cpuids[nb_lc++] = i;
704         }
705         nb_lcores = (lcoreid_t) nb_lc;
706         nb_cfg_lcores = nb_lcores;
707         nb_fwd_lcores = 1;
708 }
709
710 static void
711 set_def_peer_eth_addrs(void)
712 {
713         portid_t i;
714
715         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
716                 peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
717                 peer_eth_addrs[i].addr_bytes[5] = i;
718         }
719 }
720
721 static void
722 set_default_fwd_ports_config(void)
723 {
724         portid_t pt_id;
725         int i = 0;
726
727         RTE_ETH_FOREACH_DEV(pt_id) {
728                 fwd_ports_ids[i++] = pt_id;
729
730                 /* Update sockets info according to the attached device */
731                 int socket_id = rte_eth_dev_socket_id(pt_id);
732                 if (socket_id >= 0 && new_socket_id(socket_id)) {
733                         if (num_sockets >= RTE_MAX_NUMA_NODES) {
734                                 rte_exit(EXIT_FAILURE,
735                                          "Total sockets greater than %u\n",
736                                          RTE_MAX_NUMA_NODES);
737                         }
738                         socket_ids[num_sockets++] = socket_id;
739                 }
740         }
741
742         nb_cfg_ports = nb_ports;
743         nb_fwd_ports = nb_ports;
744 }
745
746 void
747 set_def_fwd_config(void)
748 {
749         set_default_fwd_lcores_config();
750         set_def_peer_eth_addrs();
751         set_default_fwd_ports_config();
752 }
753
754 #ifndef RTE_EXEC_ENV_WINDOWS
755 /* extremely pessimistic estimation of memory required to create a mempool */
756 static int
757 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
758 {
759         unsigned int n_pages, mbuf_per_pg, leftover;
760         uint64_t total_mem, mbuf_mem, obj_sz;
761
762         /* there is no good way to predict how much space the mempool will
763          * occupy because it will allocate chunks on the fly, and some of those
764          * will come from default DPDK memory while some will come from our
765          * external memory, so just assume 128MB will be enough for everyone.
766          */
767         uint64_t hdr_mem = 128 << 20;
768
769         /* account for possible non-contiguousness */
770         obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
771         if (obj_sz > pgsz) {
772                 TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
773                 return -1;
774         }
775
776         mbuf_per_pg = pgsz / obj_sz;
777         leftover = (nb_mbufs % mbuf_per_pg) > 0;
778         n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
779
780         mbuf_mem = n_pages * pgsz;
781
782         total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
783
784         if (total_mem > SIZE_MAX) {
785                 TESTPMD_LOG(ERR, "Memory size too big\n");
786                 return -1;
787         }
788         *out = (size_t)total_mem;
789
790         return 0;
791 }
792
793 static int
794 pagesz_flags(uint64_t page_sz)
795 {
796         /* as per mmap() manpage, all page sizes are log2 of page size
797          * shifted by MAP_HUGE_SHIFT
798          */
799         int log2 = rte_log2_u64(page_sz);
800
801         return (log2 << HUGE_SHIFT);
802 }
803
804 static void *
805 alloc_mem(size_t memsz, size_t pgsz, bool huge)
806 {
807         void *addr;
808         int flags;
809
810         /* allocate anonymous hugepages */
811         flags = MAP_ANONYMOUS | MAP_PRIVATE;
812         if (huge)
813                 flags |= HUGE_FLAG | pagesz_flags(pgsz);
814
815         addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
816         if (addr == MAP_FAILED)
817                 return NULL;
818
819         return addr;
820 }
821
822 struct extmem_param {
823         void *addr;
824         size_t len;
825         size_t pgsz;
826         rte_iova_t *iova_table;
827         unsigned int iova_table_len;
828 };
829
830 static int
831 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
832                 bool huge)
833 {
834         uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
835                         RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
836         unsigned int cur_page, n_pages, pgsz_idx;
837         size_t mem_sz, cur_pgsz;
838         rte_iova_t *iovas = NULL;
839         void *addr;
840         int ret;
841
842         for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
843                 /* skip anything that is too big */
844                 if (pgsizes[pgsz_idx] > SIZE_MAX)
845                         continue;
846
847                 cur_pgsz = pgsizes[pgsz_idx];
848
849                 /* if we were told not to allocate hugepages, override */
850                 if (!huge)
851                         cur_pgsz = sysconf(_SC_PAGESIZE);
852
853                 ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
854                 if (ret < 0) {
855                         TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
856                         return -1;
857                 }
858
859                 /* allocate our memory */
860                 addr = alloc_mem(mem_sz, cur_pgsz, huge);
861
862                 /* if we couldn't allocate memory with a specified page size,
863                  * that doesn't mean we can't do it with other page sizes, so
864                  * try another one.
865                  */
866                 if (addr == NULL)
867                         continue;
868
869                 /* store IOVA addresses for every page in this memory area */
870                 n_pages = mem_sz / cur_pgsz;
871
872                 iovas = malloc(sizeof(*iovas) * n_pages);
873
874                 if (iovas == NULL) {
875                         TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
876                         goto fail;
877                 }
878                 /* lock memory if it's not huge pages */
879                 if (!huge)
880                         mlock(addr, mem_sz);
881
882                 /* populate IOVA addresses */
883                 for (cur_page = 0; cur_page < n_pages; cur_page++) {
884                         rte_iova_t iova;
885                         size_t offset;
886                         void *cur;
887
888                         offset = cur_pgsz * cur_page;
889                         cur = RTE_PTR_ADD(addr, offset);
890
891                         /* touch the page before getting its IOVA */
892                         *(volatile char *)cur = 0;
893
894                         iova = rte_mem_virt2iova(cur);
895
896                         iovas[cur_page] = iova;
897                 }
898
899                 break;
900         }
901         /* if we couldn't allocate anything */
902         if (iovas == NULL)
903                 return -1;
904
905         param->addr = addr;
906         param->len = mem_sz;
907         param->pgsz = cur_pgsz;
908         param->iova_table = iovas;
909         param->iova_table_len = n_pages;
910
911         return 0;
912 fail:
913         if (iovas)
914                 free(iovas);
915         if (addr)
916                 munmap(addr, mem_sz);
917
918         return -1;
919 }
920
921 static int
922 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
923 {
924         struct extmem_param param;
925         int socket_id, ret;
926
927         memset(&param, 0, sizeof(param));
928
929         /* check if our heap exists */
930         socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
931         if (socket_id < 0) {
932                 /* create our heap */
933                 ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
934                 if (ret < 0) {
935                         TESTPMD_LOG(ERR, "Cannot create heap\n");
936                         return -1;
937                 }
938         }
939
940         ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
941         if (ret < 0) {
942                 TESTPMD_LOG(ERR, "Cannot create memory area\n");
943                 return -1;
944         }
945
946         /* we now have a valid memory area, so add it to heap */
947         ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
948                         param.addr, param.len, param.iova_table,
949                         param.iova_table_len, param.pgsz);
950
951         /* when using VFIO, memory is automatically mapped for DMA by EAL */
952
953         /* not needed any more */
954         free(param.iova_table);
955
956         if (ret < 0) {
957                 TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
958                 munmap(param.addr, param.len);
959                 return -1;
960         }
961
962         /* success */
963
964         TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
965                         param.len >> 20);
966
967         return 0;
968 }
969 static void
970 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
971              struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
972 {
973         uint16_t pid = 0;
974         int ret;
975
976         RTE_ETH_FOREACH_DEV(pid) {
977                 struct rte_eth_dev_info dev_info;
978
979                 ret = eth_dev_info_get_print_err(pid, &dev_info);
980                 if (ret != 0) {
981                         TESTPMD_LOG(DEBUG,
982                                     "unable to get device info for port %d on addr 0x%p,"
983                                     "mempool unmapping will not be performed\n",
984                                     pid, memhdr->addr);
985                         continue;
986                 }
987
988                 ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
989                 if (ret) {
990                         TESTPMD_LOG(DEBUG,
991                                     "unable to DMA unmap addr 0x%p "
992                                     "for device %s\n",
993                                     memhdr->addr, dev_info.device->name);
994                 }
995         }
996         ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
997         if (ret) {
998                 TESTPMD_LOG(DEBUG,
999                             "unable to un-register addr 0x%p\n", memhdr->addr);
1000         }
1001 }
1002
1003 static void
1004 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1005            struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1006 {
1007         uint16_t pid = 0;
1008         size_t page_size = sysconf(_SC_PAGESIZE);
1009         int ret;
1010
1011         ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1012                                   page_size);
1013         if (ret) {
1014                 TESTPMD_LOG(DEBUG,
1015                             "unable to register addr 0x%p\n", memhdr->addr);
1016                 return;
1017         }
1018         RTE_ETH_FOREACH_DEV(pid) {
1019                 struct rte_eth_dev_info dev_info;
1020
1021                 ret = eth_dev_info_get_print_err(pid, &dev_info);
1022                 if (ret != 0) {
1023                         TESTPMD_LOG(DEBUG,
1024                                     "unable to get device info for port %d on addr 0x%p,"
1025                                     "mempool mapping will not be performed\n",
1026                                     pid, memhdr->addr);
1027                         continue;
1028                 }
1029                 ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1030                 if (ret) {
1031                         TESTPMD_LOG(DEBUG,
1032                                     "unable to DMA map addr 0x%p "
1033                                     "for device %s\n",
1034                                     memhdr->addr, dev_info.device->name);
1035                 }
1036         }
1037 }
1038 #endif
1039
1040 static unsigned int
1041 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1042             char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1043 {
1044         struct rte_pktmbuf_extmem *xmem;
1045         unsigned int ext_num, zone_num, elt_num;
1046         uint16_t elt_size;
1047
1048         elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1049         elt_num = EXTBUF_ZONE_SIZE / elt_size;
1050         zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1051
1052         xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1053         if (xmem == NULL) {
1054                 TESTPMD_LOG(ERR, "Cannot allocate memory for "
1055                                  "external buffer descriptors\n");
1056                 *ext_mem = NULL;
1057                 return 0;
1058         }
1059         for (ext_num = 0; ext_num < zone_num; ext_num++) {
1060                 struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1061                 const struct rte_memzone *mz;
1062                 char mz_name[RTE_MEMZONE_NAMESIZE];
1063                 int ret;
1064
1065                 ret = snprintf(mz_name, sizeof(mz_name),
1066                         RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1067                 if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1068                         errno = ENAMETOOLONG;
1069                         ext_num = 0;
1070                         break;
1071                 }
1072                 mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
1073                                                  socket_id,
1074                                                  RTE_MEMZONE_IOVA_CONTIG |
1075                                                  RTE_MEMZONE_1GB |
1076                                                  RTE_MEMZONE_SIZE_HINT_ONLY,
1077                                                  EXTBUF_ZONE_SIZE);
1078                 if (mz == NULL) {
1079                         /*
1080                          * The caller exits on external buffer creation
1081                          * error, so there is no need to free memzones.
1082                          */
1083                         errno = ENOMEM;
1084                         ext_num = 0;
1085                         break;
1086                 }
1087                 xseg->buf_ptr = mz->addr;
1088                 xseg->buf_iova = mz->iova;
1089                 xseg->buf_len = EXTBUF_ZONE_SIZE;
1090                 xseg->elt_size = elt_size;
1091         }
1092         if (ext_num == 0 && xmem != NULL) {
1093                 free(xmem);
1094                 xmem = NULL;
1095         }
1096         *ext_mem = xmem;
1097         return ext_num;
1098 }
1099
1100 /*
1101  * Configuration initialisation done once at init time.
1102  */
1103 static struct rte_mempool *
1104 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1105                  unsigned int socket_id, uint16_t size_idx)
1106 {
1107         char pool_name[RTE_MEMPOOL_NAMESIZE];
1108         struct rte_mempool *rte_mp = NULL;
1109 #ifndef RTE_EXEC_ENV_WINDOWS
1110         uint32_t mb_size;
1111
1112         mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1113 #endif
1114         mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1115         if (!is_proc_primary()) {
1116                 rte_mp = rte_mempool_lookup(pool_name);
1117                 if (rte_mp == NULL)
1118                         rte_exit(EXIT_FAILURE,
1119                                 "Get mbuf pool for socket %u failed: %s\n",
1120                                 socket_id, rte_strerror(rte_errno));
1121                 return rte_mp;
1122         }
1123
1124         TESTPMD_LOG(INFO,
1125                 "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1126                 pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1127
1128         switch (mp_alloc_type) {
1129         case MP_ALLOC_NATIVE:
1130                 {
1131                         /* wrapper to rte_mempool_create() */
1132                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1133                                         rte_mbuf_best_mempool_ops());
1134                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1135                                 mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1136                         break;
1137                 }
1138 #ifndef RTE_EXEC_ENV_WINDOWS
1139         case MP_ALLOC_ANON:
1140                 {
1141                         rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1142                                 mb_size, (unsigned int) mb_mempool_cache,
1143                                 sizeof(struct rte_pktmbuf_pool_private),
1144                                 socket_id, mempool_flags);
1145                         if (rte_mp == NULL)
1146                                 goto err;
1147
1148                         if (rte_mempool_populate_anon(rte_mp) == 0) {
1149                                 rte_mempool_free(rte_mp);
1150                                 rte_mp = NULL;
1151                                 goto err;
1152                         }
1153                         rte_pktmbuf_pool_init(rte_mp, NULL);
1154                         rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1155                         rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1156                         break;
1157                 }
1158         case MP_ALLOC_XMEM:
1159         case MP_ALLOC_XMEM_HUGE:
1160                 {
1161                         int heap_socket;
1162                         bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1163
1164                         if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1165                                 rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1166
1167                         heap_socket =
1168                                 rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1169                         if (heap_socket < 0)
1170                                 rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1171
1172                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1173                                         rte_mbuf_best_mempool_ops());
1174                         rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1175                                         mb_mempool_cache, 0, mbuf_seg_size,
1176                                         heap_socket);
1177                         break;
1178                 }
1179 #endif
1180         case MP_ALLOC_XBUF:
1181                 {
1182                         struct rte_pktmbuf_extmem *ext_mem;
1183                         unsigned int ext_num;
1184
1185                         ext_num = setup_extbuf(nb_mbuf, mbuf_seg_size,
1186                                                socket_id, pool_name, &ext_mem);
1187                         if (ext_num == 0)
1188                                 rte_exit(EXIT_FAILURE,
1189                                          "Can't create pinned data buffers\n");
1190
1191                         TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1192                                         rte_mbuf_best_mempool_ops());
1193                         rte_mp = rte_pktmbuf_pool_create_extbuf
1194                                         (pool_name, nb_mbuf, mb_mempool_cache,
1195                                          0, mbuf_seg_size, socket_id,
1196                                          ext_mem, ext_num);
1197                         free(ext_mem);
1198                         break;
1199                 }
1200         default:
1201                 {
1202                         rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1203                 }
1204         }
1205
1206 #ifndef RTE_EXEC_ENV_WINDOWS
1207 err:
1208 #endif
1209         if (rte_mp == NULL) {
1210                 rte_exit(EXIT_FAILURE,
1211                         "Creation of mbuf pool for socket %u failed: %s\n",
1212                         socket_id, rte_strerror(rte_errno));
1213         } else if (verbose_level > 0) {
1214                 rte_mempool_dump(stdout, rte_mp);
1215         }
1216         return rte_mp;
1217 }
1218
1219 /*
1220  * Check given socket id is valid or not with NUMA mode,
1221  * if valid, return 0, else return -1
1222  */
1223 static int
1224 check_socket_id(const unsigned int socket_id)
1225 {
1226         static int warning_once = 0;
1227
1228         if (new_socket_id(socket_id)) {
1229                 if (!warning_once && numa_support)
1230                         fprintf(stderr,
1231                                 "Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1232                 warning_once = 1;
1233                 return -1;
1234         }
1235         return 0;
1236 }
1237
1238 /*
1239  * Get the allowed maximum number of RX queues.
1240  * *pid return the port id which has minimal value of
1241  * max_rx_queues in all ports.
1242  */
1243 queueid_t
1244 get_allowed_max_nb_rxq(portid_t *pid)
1245 {
1246         queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1247         bool max_rxq_valid = false;
1248         portid_t pi;
1249         struct rte_eth_dev_info dev_info;
1250
1251         RTE_ETH_FOREACH_DEV(pi) {
1252                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1253                         continue;
1254
1255                 max_rxq_valid = true;
1256                 if (dev_info.max_rx_queues < allowed_max_rxq) {
1257                         allowed_max_rxq = dev_info.max_rx_queues;
1258                         *pid = pi;
1259                 }
1260         }
1261         return max_rxq_valid ? allowed_max_rxq : 0;
1262 }
1263
1264 /*
1265  * Check input rxq is valid or not.
1266  * If input rxq is not greater than any of maximum number
1267  * of RX queues of all ports, it is valid.
1268  * if valid, return 0, else return -1
1269  */
1270 int
1271 check_nb_rxq(queueid_t rxq)
1272 {
1273         queueid_t allowed_max_rxq;
1274         portid_t pid = 0;
1275
1276         allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1277         if (rxq > allowed_max_rxq) {
1278                 fprintf(stderr,
1279                         "Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1280                         rxq, allowed_max_rxq, pid);
1281                 return -1;
1282         }
1283         return 0;
1284 }
1285
1286 /*
1287  * Get the allowed maximum number of TX queues.
1288  * *pid return the port id which has minimal value of
1289  * max_tx_queues in all ports.
1290  */
1291 queueid_t
1292 get_allowed_max_nb_txq(portid_t *pid)
1293 {
1294         queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1295         bool max_txq_valid = false;
1296         portid_t pi;
1297         struct rte_eth_dev_info dev_info;
1298
1299         RTE_ETH_FOREACH_DEV(pi) {
1300                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1301                         continue;
1302
1303                 max_txq_valid = true;
1304                 if (dev_info.max_tx_queues < allowed_max_txq) {
1305                         allowed_max_txq = dev_info.max_tx_queues;
1306                         *pid = pi;
1307                 }
1308         }
1309         return max_txq_valid ? allowed_max_txq : 0;
1310 }
1311
1312 /*
1313  * Check input txq is valid or not.
1314  * If input txq is not greater than any of maximum number
1315  * of TX queues of all ports, it is valid.
1316  * if valid, return 0, else return -1
1317  */
1318 int
1319 check_nb_txq(queueid_t txq)
1320 {
1321         queueid_t allowed_max_txq;
1322         portid_t pid = 0;
1323
1324         allowed_max_txq = get_allowed_max_nb_txq(&pid);
1325         if (txq > allowed_max_txq) {
1326                 fprintf(stderr,
1327                         "Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1328                         txq, allowed_max_txq, pid);
1329                 return -1;
1330         }
1331         return 0;
1332 }
1333
1334 /*
1335  * Get the allowed maximum number of RXDs of every rx queue.
1336  * *pid return the port id which has minimal value of
1337  * max_rxd in all queues of all ports.
1338  */
1339 static uint16_t
1340 get_allowed_max_nb_rxd(portid_t *pid)
1341 {
1342         uint16_t allowed_max_rxd = UINT16_MAX;
1343         portid_t pi;
1344         struct rte_eth_dev_info dev_info;
1345
1346         RTE_ETH_FOREACH_DEV(pi) {
1347                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1348                         continue;
1349
1350                 if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1351                         allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1352                         *pid = pi;
1353                 }
1354         }
1355         return allowed_max_rxd;
1356 }
1357
1358 /*
1359  * Get the allowed minimal number of RXDs of every rx queue.
1360  * *pid return the port id which has minimal value of
1361  * min_rxd in all queues of all ports.
1362  */
1363 static uint16_t
1364 get_allowed_min_nb_rxd(portid_t *pid)
1365 {
1366         uint16_t allowed_min_rxd = 0;
1367         portid_t pi;
1368         struct rte_eth_dev_info dev_info;
1369
1370         RTE_ETH_FOREACH_DEV(pi) {
1371                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1372                         continue;
1373
1374                 if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1375                         allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1376                         *pid = pi;
1377                 }
1378         }
1379
1380         return allowed_min_rxd;
1381 }
1382
1383 /*
1384  * Check input rxd is valid or not.
1385  * If input rxd is not greater than any of maximum number
1386  * of RXDs of every Rx queues and is not less than any of
1387  * minimal number of RXDs of every Rx queues, it is valid.
1388  * if valid, return 0, else return -1
1389  */
1390 int
1391 check_nb_rxd(queueid_t rxd)
1392 {
1393         uint16_t allowed_max_rxd;
1394         uint16_t allowed_min_rxd;
1395         portid_t pid = 0;
1396
1397         allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1398         if (rxd > allowed_max_rxd) {
1399                 fprintf(stderr,
1400                         "Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1401                         rxd, allowed_max_rxd, pid);
1402                 return -1;
1403         }
1404
1405         allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1406         if (rxd < allowed_min_rxd) {
1407                 fprintf(stderr,
1408                         "Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1409                         rxd, allowed_min_rxd, pid);
1410                 return -1;
1411         }
1412
1413         return 0;
1414 }
1415
1416 /*
1417  * Get the allowed maximum number of TXDs of every rx queues.
1418  * *pid return the port id which has minimal value of
1419  * max_txd in every tx queue.
1420  */
1421 static uint16_t
1422 get_allowed_max_nb_txd(portid_t *pid)
1423 {
1424         uint16_t allowed_max_txd = UINT16_MAX;
1425         portid_t pi;
1426         struct rte_eth_dev_info dev_info;
1427
1428         RTE_ETH_FOREACH_DEV(pi) {
1429                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1430                         continue;
1431
1432                 if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1433                         allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1434                         *pid = pi;
1435                 }
1436         }
1437         return allowed_max_txd;
1438 }
1439
1440 /*
1441  * Get the allowed maximum number of TXDs of every tx queues.
1442  * *pid return the port id which has minimal value of
1443  * min_txd in every tx queue.
1444  */
1445 static uint16_t
1446 get_allowed_min_nb_txd(portid_t *pid)
1447 {
1448         uint16_t allowed_min_txd = 0;
1449         portid_t pi;
1450         struct rte_eth_dev_info dev_info;
1451
1452         RTE_ETH_FOREACH_DEV(pi) {
1453                 if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1454                         continue;
1455
1456                 if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1457                         allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1458                         *pid = pi;
1459                 }
1460         }
1461
1462         return allowed_min_txd;
1463 }
1464
1465 /*
1466  * Check input txd is valid or not.
1467  * If input txd is not greater than any of maximum number
1468  * of TXDs of every Rx queues, it is valid.
1469  * if valid, return 0, else return -1
1470  */
1471 int
1472 check_nb_txd(queueid_t txd)
1473 {
1474         uint16_t allowed_max_txd;
1475         uint16_t allowed_min_txd;
1476         portid_t pid = 0;
1477
1478         allowed_max_txd = get_allowed_max_nb_txd(&pid);
1479         if (txd > allowed_max_txd) {
1480                 fprintf(stderr,
1481                         "Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1482                         txd, allowed_max_txd, pid);
1483                 return -1;
1484         }
1485
1486         allowed_min_txd = get_allowed_min_nb_txd(&pid);
1487         if (txd < allowed_min_txd) {
1488                 fprintf(stderr,
1489                         "Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1490                         txd, allowed_min_txd, pid);
1491                 return -1;
1492         }
1493         return 0;
1494 }
1495
1496
1497 /*
1498  * Get the allowed maximum number of hairpin queues.
1499  * *pid return the port id which has minimal value of
1500  * max_hairpin_queues in all ports.
1501  */
1502 queueid_t
1503 get_allowed_max_nb_hairpinq(portid_t *pid)
1504 {
1505         queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1506         portid_t pi;
1507         struct rte_eth_hairpin_cap cap;
1508
1509         RTE_ETH_FOREACH_DEV(pi) {
1510                 if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1511                         *pid = pi;
1512                         return 0;
1513                 }
1514                 if (cap.max_nb_queues < allowed_max_hairpinq) {
1515                         allowed_max_hairpinq = cap.max_nb_queues;
1516                         *pid = pi;
1517                 }
1518         }
1519         return allowed_max_hairpinq;
1520 }
1521
1522 /*
1523  * Check input hairpin is valid or not.
1524  * If input hairpin is not greater than any of maximum number
1525  * of hairpin queues of all ports, it is valid.
1526  * if valid, return 0, else return -1
1527  */
1528 int
1529 check_nb_hairpinq(queueid_t hairpinq)
1530 {
1531         queueid_t allowed_max_hairpinq;
1532         portid_t pid = 0;
1533
1534         allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1535         if (hairpinq > allowed_max_hairpinq) {
1536                 fprintf(stderr,
1537                         "Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1538                         hairpinq, allowed_max_hairpinq, pid);
1539                 return -1;
1540         }
1541         return 0;
1542 }
1543
1544 static int
1545 get_eth_overhead(struct rte_eth_dev_info *dev_info)
1546 {
1547         uint32_t eth_overhead;
1548
1549         if (dev_info->max_mtu != UINT16_MAX &&
1550             dev_info->max_rx_pktlen > dev_info->max_mtu)
1551                 eth_overhead = dev_info->max_rx_pktlen - dev_info->max_mtu;
1552         else
1553                 eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
1554
1555         return eth_overhead;
1556 }
1557
1558 static void
1559 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1560 {
1561         struct rte_port *port = &ports[pid];
1562         int ret;
1563         int i;
1564
1565         eth_rx_metadata_negotiate_mp(pid);
1566         flow_pick_transfer_proxy_mp(pid);
1567
1568         port->dev_conf.txmode = tx_mode;
1569         port->dev_conf.rxmode = rx_mode;
1570
1571         ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1572         if (ret != 0)
1573                 rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1574
1575         ret = update_jumbo_frame_offload(pid, 0);
1576         if (ret != 0)
1577                 fprintf(stderr,
1578                         "Updating jumbo frame offload failed for port %u\n",
1579                         pid);
1580
1581         if (!(port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1582                 port->dev_conf.txmode.offloads &=
1583                         ~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1584
1585         /* Apply Rx offloads configuration */
1586         for (i = 0; i < port->dev_info.max_rx_queues; i++)
1587                 port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1588         /* Apply Tx offloads configuration */
1589         for (i = 0; i < port->dev_info.max_tx_queues; i++)
1590                 port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1591
1592         if (eth_link_speed)
1593                 port->dev_conf.link_speeds = eth_link_speed;
1594
1595         if (max_rx_pkt_len)
1596                 port->dev_conf.rxmode.mtu = max_rx_pkt_len -
1597                         get_eth_overhead(&port->dev_info);
1598
1599         /* set flag to initialize port/queue */
1600         port->need_reconfig = 1;
1601         port->need_reconfig_queues = 1;
1602         port->socket_id = socket_id;
1603         port->tx_metadata = 0;
1604
1605         /*
1606          * Check for maximum number of segments per MTU.
1607          * Accordingly update the mbuf data size.
1608          */
1609         if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1610             port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1611                 uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
1612                 uint16_t mtu;
1613
1614                 if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
1615                         uint16_t data_size = (mtu + eth_overhead) /
1616                                 port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1617                         uint16_t buffer_size = data_size + RTE_PKTMBUF_HEADROOM;
1618
1619                         if (buffer_size > mbuf_data_size[0]) {
1620                                 mbuf_data_size[0] = buffer_size;
1621                                 TESTPMD_LOG(WARNING,
1622                                         "Configured mbuf size of the first segment %hu\n",
1623                                         mbuf_data_size[0]);
1624                         }
1625                 }
1626         }
1627 }
1628
1629 static void
1630 init_config(void)
1631 {
1632         portid_t pid;
1633         struct rte_mempool *mbp;
1634         unsigned int nb_mbuf_per_pool;
1635         lcoreid_t  lc_id;
1636         struct rte_gro_param gro_param;
1637         uint32_t gso_types;
1638
1639         /* Configuration of logical cores. */
1640         fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1641                                 sizeof(struct fwd_lcore *) * nb_lcores,
1642                                 RTE_CACHE_LINE_SIZE);
1643         if (fwd_lcores == NULL) {
1644                 rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1645                                                         "failed\n", nb_lcores);
1646         }
1647         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1648                 fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1649                                                sizeof(struct fwd_lcore),
1650                                                RTE_CACHE_LINE_SIZE);
1651                 if (fwd_lcores[lc_id] == NULL) {
1652                         rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1653                                                                 "failed\n");
1654                 }
1655                 fwd_lcores[lc_id]->cpuid_idx = lc_id;
1656         }
1657
1658         RTE_ETH_FOREACH_DEV(pid) {
1659                 uint32_t socket_id;
1660
1661                 if (numa_support) {
1662                         socket_id = port_numa[pid];
1663                         if (port_numa[pid] == NUMA_NO_CONFIG) {
1664                                 socket_id = rte_eth_dev_socket_id(pid);
1665
1666                                 /*
1667                                  * if socket_id is invalid,
1668                                  * set to the first available socket.
1669                                  */
1670                                 if (check_socket_id(socket_id) < 0)
1671                                         socket_id = socket_ids[0];
1672                         }
1673                 } else {
1674                         socket_id = (socket_num == UMA_NO_CONFIG) ?
1675                                     0 : socket_num;
1676                 }
1677                 /* Apply default TxRx configuration for all ports */
1678                 init_config_port_offloads(pid, socket_id);
1679         }
1680         /*
1681          * Create pools of mbuf.
1682          * If NUMA support is disabled, create a single pool of mbuf in
1683          * socket 0 memory by default.
1684          * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1685          *
1686          * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1687          * nb_txd can be configured at run time.
1688          */
1689         if (param_total_num_mbufs)
1690                 nb_mbuf_per_pool = param_total_num_mbufs;
1691         else {
1692                 nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1693                         (nb_lcores * mb_mempool_cache) +
1694                         RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1695                 nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1696         }
1697
1698         if (numa_support) {
1699                 uint8_t i, j;
1700
1701                 for (i = 0; i < num_sockets; i++)
1702                         for (j = 0; j < mbuf_data_size_n; j++)
1703                                 mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1704                                         mbuf_pool_create(mbuf_data_size[j],
1705                                                           nb_mbuf_per_pool,
1706                                                           socket_ids[i], j);
1707         } else {
1708                 uint8_t i;
1709
1710                 for (i = 0; i < mbuf_data_size_n; i++)
1711                         mempools[i] = mbuf_pool_create
1712                                         (mbuf_data_size[i],
1713                                          nb_mbuf_per_pool,
1714                                          socket_num == UMA_NO_CONFIG ?
1715                                          0 : socket_num, i);
1716         }
1717
1718         init_port_config();
1719
1720         gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1721                 DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1722         /*
1723          * Records which Mbuf pool to use by each logical core, if needed.
1724          */
1725         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1726                 mbp = mbuf_pool_find(
1727                         rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1728
1729                 if (mbp == NULL)
1730                         mbp = mbuf_pool_find(0, 0);
1731                 fwd_lcores[lc_id]->mbp = mbp;
1732                 /* initialize GSO context */
1733                 fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1734                 fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1735                 fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1736                 fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1737                         RTE_ETHER_CRC_LEN;
1738                 fwd_lcores[lc_id]->gso_ctx.flag = 0;
1739         }
1740
1741         fwd_config_setup();
1742
1743         /* create a gro context for each lcore */
1744         gro_param.gro_types = RTE_GRO_TCP_IPV4;
1745         gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1746         gro_param.max_item_per_flow = MAX_PKT_BURST;
1747         for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1748                 gro_param.socket_id = rte_lcore_to_socket_id(
1749                                 fwd_lcores_cpuids[lc_id]);
1750                 fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1751                 if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1752                         rte_exit(EXIT_FAILURE,
1753                                         "rte_gro_ctx_create() failed\n");
1754                 }
1755         }
1756 }
1757
1758
1759 void
1760 reconfig(portid_t new_port_id, unsigned socket_id)
1761 {
1762         /* Reconfiguration of Ethernet ports. */
1763         init_config_port_offloads(new_port_id, socket_id);
1764         init_port_config();
1765 }
1766
1767
1768 int
1769 init_fwd_streams(void)
1770 {
1771         portid_t pid;
1772         struct rte_port *port;
1773         streamid_t sm_id, nb_fwd_streams_new;
1774         queueid_t q;
1775
1776         /* set socket id according to numa or not */
1777         RTE_ETH_FOREACH_DEV(pid) {
1778                 port = &ports[pid];
1779                 if (nb_rxq > port->dev_info.max_rx_queues) {
1780                         fprintf(stderr,
1781                                 "Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1782                                 nb_rxq, port->dev_info.max_rx_queues);
1783                         return -1;
1784                 }
1785                 if (nb_txq > port->dev_info.max_tx_queues) {
1786                         fprintf(stderr,
1787                                 "Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1788                                 nb_txq, port->dev_info.max_tx_queues);
1789                         return -1;
1790                 }
1791                 if (numa_support) {
1792                         if (port_numa[pid] != NUMA_NO_CONFIG)
1793                                 port->socket_id = port_numa[pid];
1794                         else {
1795                                 port->socket_id = rte_eth_dev_socket_id(pid);
1796
1797                                 /*
1798                                  * if socket_id is invalid,
1799                                  * set to the first available socket.
1800                                  */
1801                                 if (check_socket_id(port->socket_id) < 0)
1802                                         port->socket_id = socket_ids[0];
1803                         }
1804                 }
1805                 else {
1806                         if (socket_num == UMA_NO_CONFIG)
1807                                 port->socket_id = 0;
1808                         else
1809                                 port->socket_id = socket_num;
1810                 }
1811         }
1812
1813         q = RTE_MAX(nb_rxq, nb_txq);
1814         if (q == 0) {
1815                 fprintf(stderr,
1816                         "Fail: Cannot allocate fwd streams as number of queues is 0\n");
1817                 return -1;
1818         }
1819         nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1820         if (nb_fwd_streams_new == nb_fwd_streams)
1821                 return 0;
1822         /* clear the old */
1823         if (fwd_streams != NULL) {
1824                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1825                         if (fwd_streams[sm_id] == NULL)
1826                                 continue;
1827                         rte_free(fwd_streams[sm_id]);
1828                         fwd_streams[sm_id] = NULL;
1829                 }
1830                 rte_free(fwd_streams);
1831                 fwd_streams = NULL;
1832         }
1833
1834         /* init new */
1835         nb_fwd_streams = nb_fwd_streams_new;
1836         if (nb_fwd_streams) {
1837                 fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1838                         sizeof(struct fwd_stream *) * nb_fwd_streams,
1839                         RTE_CACHE_LINE_SIZE);
1840                 if (fwd_streams == NULL)
1841                         rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1842                                  " (struct fwd_stream *)) failed\n",
1843                                  nb_fwd_streams);
1844
1845                 for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1846                         fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1847                                 " struct fwd_stream", sizeof(struct fwd_stream),
1848                                 RTE_CACHE_LINE_SIZE);
1849                         if (fwd_streams[sm_id] == NULL)
1850                                 rte_exit(EXIT_FAILURE, "rte_zmalloc"
1851                                          "(struct fwd_stream) failed\n");
1852                 }
1853         }
1854
1855         return 0;
1856 }
1857
1858 static void
1859 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1860 {
1861         uint64_t total_burst, sburst;
1862         uint64_t nb_burst;
1863         uint64_t burst_stats[4];
1864         uint16_t pktnb_stats[4];
1865         uint16_t nb_pkt;
1866         int burst_percent[4], sburstp;
1867         int i;
1868
1869         /*
1870          * First compute the total number of packet bursts and the
1871          * two highest numbers of bursts of the same number of packets.
1872          */
1873         memset(&burst_stats, 0x0, sizeof(burst_stats));
1874         memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1875
1876         /* Show stats for 0 burst size always */
1877         total_burst = pbs->pkt_burst_spread[0];
1878         burst_stats[0] = pbs->pkt_burst_spread[0];
1879         pktnb_stats[0] = 0;
1880
1881         /* Find the next 2 burst sizes with highest occurrences. */
1882         for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1883                 nb_burst = pbs->pkt_burst_spread[nb_pkt];
1884
1885                 if (nb_burst == 0)
1886                         continue;
1887
1888                 total_burst += nb_burst;
1889
1890                 if (nb_burst > burst_stats[1]) {
1891                         burst_stats[2] = burst_stats[1];
1892                         pktnb_stats[2] = pktnb_stats[1];
1893                         burst_stats[1] = nb_burst;
1894                         pktnb_stats[1] = nb_pkt;
1895                 } else if (nb_burst > burst_stats[2]) {
1896                         burst_stats[2] = nb_burst;
1897                         pktnb_stats[2] = nb_pkt;
1898                 }
1899         }
1900         if (total_burst == 0)
1901                 return;
1902
1903         printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1904         for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1905                 if (i == 3) {
1906                         printf("%d%% of other]\n", 100 - sburstp);
1907                         return;
1908                 }
1909
1910                 sburst += burst_stats[i];
1911                 if (sburst == total_burst) {
1912                         printf("%d%% of %d pkts]\n",
1913                                 100 - sburstp, (int) pktnb_stats[i]);
1914                         return;
1915                 }
1916
1917                 burst_percent[i] =
1918                         (double)burst_stats[i] / total_burst * 100;
1919                 printf("%d%% of %d pkts + ",
1920                         burst_percent[i], (int) pktnb_stats[i]);
1921                 sburstp += burst_percent[i];
1922         }
1923 }
1924
1925 static void
1926 fwd_stream_stats_display(streamid_t stream_id)
1927 {
1928         struct fwd_stream *fs;
1929         static const char *fwd_top_stats_border = "-------";
1930
1931         fs = fwd_streams[stream_id];
1932         if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1933             (fs->fwd_dropped == 0))
1934                 return;
1935         printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1936                "TX Port=%2d/Queue=%2d %s\n",
1937                fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1938                fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1939         printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1940                " TX-dropped: %-14"PRIu64,
1941                fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1942
1943         /* if checksum mode */
1944         if (cur_fwd_eng == &csum_fwd_engine) {
1945                 printf("  RX- bad IP checksum: %-14"PRIu64
1946                        "  Rx- bad L4 checksum: %-14"PRIu64
1947                        " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1948                         fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1949                         fs->rx_bad_outer_l4_csum);
1950                 printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1951                         fs->rx_bad_outer_ip_csum);
1952         } else {
1953                 printf("\n");
1954         }
1955
1956         if (record_burst_stats) {
1957                 pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1958                 pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1959         }
1960 }
1961
1962 void
1963 fwd_stats_display(void)
1964 {
1965         static const char *fwd_stats_border = "----------------------";
1966         static const char *acc_stats_border = "+++++++++++++++";
1967         struct {
1968                 struct fwd_stream *rx_stream;
1969                 struct fwd_stream *tx_stream;
1970                 uint64_t tx_dropped;
1971                 uint64_t rx_bad_ip_csum;
1972                 uint64_t rx_bad_l4_csum;
1973                 uint64_t rx_bad_outer_l4_csum;
1974                 uint64_t rx_bad_outer_ip_csum;
1975         } ports_stats[RTE_MAX_ETHPORTS];
1976         uint64_t total_rx_dropped = 0;
1977         uint64_t total_tx_dropped = 0;
1978         uint64_t total_rx_nombuf = 0;
1979         struct rte_eth_stats stats;
1980         uint64_t fwd_cycles = 0;
1981         uint64_t total_recv = 0;
1982         uint64_t total_xmit = 0;
1983         struct rte_port *port;
1984         streamid_t sm_id;
1985         portid_t pt_id;
1986         int i;
1987
1988         memset(ports_stats, 0, sizeof(ports_stats));
1989
1990         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1991                 struct fwd_stream *fs = fwd_streams[sm_id];
1992
1993                 if (cur_fwd_config.nb_fwd_streams >
1994                     cur_fwd_config.nb_fwd_ports) {
1995                         fwd_stream_stats_display(sm_id);
1996                 } else {
1997                         ports_stats[fs->tx_port].tx_stream = fs;
1998                         ports_stats[fs->rx_port].rx_stream = fs;
1999                 }
2000
2001                 ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
2002
2003                 ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
2004                 ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
2005                 ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
2006                                 fs->rx_bad_outer_l4_csum;
2007                 ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
2008                                 fs->rx_bad_outer_ip_csum;
2009
2010                 if (record_core_cycles)
2011                         fwd_cycles += fs->core_cycles;
2012         }
2013         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2014                 pt_id = fwd_ports_ids[i];
2015                 port = &ports[pt_id];
2016
2017                 rte_eth_stats_get(pt_id, &stats);
2018                 stats.ipackets -= port->stats.ipackets;
2019                 stats.opackets -= port->stats.opackets;
2020                 stats.ibytes -= port->stats.ibytes;
2021                 stats.obytes -= port->stats.obytes;
2022                 stats.imissed -= port->stats.imissed;
2023                 stats.oerrors -= port->stats.oerrors;
2024                 stats.rx_nombuf -= port->stats.rx_nombuf;
2025
2026                 total_recv += stats.ipackets;
2027                 total_xmit += stats.opackets;
2028                 total_rx_dropped += stats.imissed;
2029                 total_tx_dropped += ports_stats[pt_id].tx_dropped;
2030                 total_tx_dropped += stats.oerrors;
2031                 total_rx_nombuf  += stats.rx_nombuf;
2032
2033                 printf("\n  %s Forward statistics for port %-2d %s\n",
2034                        fwd_stats_border, pt_id, fwd_stats_border);
2035
2036                 printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2037                        "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2038                        stats.ipackets + stats.imissed);
2039
2040                 if (cur_fwd_eng == &csum_fwd_engine) {
2041                         printf("  Bad-ipcsum: %-14"PRIu64
2042                                " Bad-l4csum: %-14"PRIu64
2043                                "Bad-outer-l4csum: %-14"PRIu64"\n",
2044                                ports_stats[pt_id].rx_bad_ip_csum,
2045                                ports_stats[pt_id].rx_bad_l4_csum,
2046                                ports_stats[pt_id].rx_bad_outer_l4_csum);
2047                         printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2048                                ports_stats[pt_id].rx_bad_outer_ip_csum);
2049                 }
2050                 if (stats.ierrors + stats.rx_nombuf > 0) {
2051                         printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2052                         printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2053                 }
2054
2055                 printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2056                        "TX-total: %-"PRIu64"\n",
2057                        stats.opackets, ports_stats[pt_id].tx_dropped,
2058                        stats.opackets + ports_stats[pt_id].tx_dropped);
2059
2060                 if (record_burst_stats) {
2061                         if (ports_stats[pt_id].rx_stream)
2062                                 pkt_burst_stats_display("RX",
2063                                         &ports_stats[pt_id].rx_stream->rx_burst_stats);
2064                         if (ports_stats[pt_id].tx_stream)
2065                                 pkt_burst_stats_display("TX",
2066                                 &ports_stats[pt_id].tx_stream->tx_burst_stats);
2067                 }
2068
2069                 printf("  %s--------------------------------%s\n",
2070                        fwd_stats_border, fwd_stats_border);
2071         }
2072
2073         printf("\n  %s Accumulated forward statistics for all ports"
2074                "%s\n",
2075                acc_stats_border, acc_stats_border);
2076         printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2077                "%-"PRIu64"\n"
2078                "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2079                "%-"PRIu64"\n",
2080                total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2081                total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2082         if (total_rx_nombuf > 0)
2083                 printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2084         printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2085                "%s\n",
2086                acc_stats_border, acc_stats_border);
2087         if (record_core_cycles) {
2088 #define CYC_PER_MHZ 1E6
2089                 if (total_recv > 0 || total_xmit > 0) {
2090                         uint64_t total_pkts = 0;
2091                         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2092                             strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2093                                 total_pkts = total_xmit;
2094                         else
2095                                 total_pkts = total_recv;
2096
2097                         printf("\n  CPU cycles/packet=%.2F (total cycles="
2098                                "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2099                                " MHz Clock\n",
2100                                (double) fwd_cycles / total_pkts,
2101                                fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2102                                (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2103                 }
2104         }
2105 }
2106
2107 void
2108 fwd_stats_reset(void)
2109 {
2110         streamid_t sm_id;
2111         portid_t pt_id;
2112         int i;
2113
2114         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2115                 pt_id = fwd_ports_ids[i];
2116                 rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2117         }
2118         for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2119                 struct fwd_stream *fs = fwd_streams[sm_id];
2120
2121                 fs->rx_packets = 0;
2122                 fs->tx_packets = 0;
2123                 fs->fwd_dropped = 0;
2124                 fs->rx_bad_ip_csum = 0;
2125                 fs->rx_bad_l4_csum = 0;
2126                 fs->rx_bad_outer_l4_csum = 0;
2127                 fs->rx_bad_outer_ip_csum = 0;
2128
2129                 memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2130                 memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2131                 fs->core_cycles = 0;
2132         }
2133 }
2134
2135 static void
2136 flush_fwd_rx_queues(void)
2137 {
2138         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2139         portid_t  rxp;
2140         portid_t port_id;
2141         queueid_t rxq;
2142         uint16_t  nb_rx;
2143         uint16_t  i;
2144         uint8_t   j;
2145         uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2146         uint64_t timer_period;
2147
2148         if (num_procs > 1) {
2149                 printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2150                 return;
2151         }
2152
2153         /* convert to number of cycles */
2154         timer_period = rte_get_timer_hz(); /* 1 second timeout */
2155
2156         for (j = 0; j < 2; j++) {
2157                 for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2158                         for (rxq = 0; rxq < nb_rxq; rxq++) {
2159                                 port_id = fwd_ports_ids[rxp];
2160                                 /**
2161                                 * testpmd can stuck in the below do while loop
2162                                 * if rte_eth_rx_burst() always returns nonzero
2163                                 * packets. So timer is added to exit this loop
2164                                 * after 1sec timer expiry.
2165                                 */
2166                                 prev_tsc = rte_rdtsc();
2167                                 do {
2168                                         nb_rx = rte_eth_rx_burst(port_id, rxq,
2169                                                 pkts_burst, MAX_PKT_BURST);
2170                                         for (i = 0; i < nb_rx; i++)
2171                                                 rte_pktmbuf_free(pkts_burst[i]);
2172
2173                                         cur_tsc = rte_rdtsc();
2174                                         diff_tsc = cur_tsc - prev_tsc;
2175                                         timer_tsc += diff_tsc;
2176                                 } while ((nb_rx > 0) &&
2177                                         (timer_tsc < timer_period));
2178                                 timer_tsc = 0;
2179                         }
2180                 }
2181                 rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2182         }
2183 }
2184
2185 static void
2186 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2187 {
2188         struct fwd_stream **fsm;
2189         streamid_t nb_fs;
2190         streamid_t sm_id;
2191 #ifdef RTE_LIB_BITRATESTATS
2192         uint64_t tics_per_1sec;
2193         uint64_t tics_datum;
2194         uint64_t tics_current;
2195         uint16_t i, cnt_ports;
2196
2197         cnt_ports = nb_ports;
2198         tics_datum = rte_rdtsc();
2199         tics_per_1sec = rte_get_timer_hz();
2200 #endif
2201         fsm = &fwd_streams[fc->stream_idx];
2202         nb_fs = fc->stream_nb;
2203         do {
2204                 for (sm_id = 0; sm_id < nb_fs; sm_id++)
2205                         (*pkt_fwd)(fsm[sm_id]);
2206 #ifdef RTE_LIB_BITRATESTATS
2207                 if (bitrate_enabled != 0 &&
2208                                 bitrate_lcore_id == rte_lcore_id()) {
2209                         tics_current = rte_rdtsc();
2210                         if (tics_current - tics_datum >= tics_per_1sec) {
2211                                 /* Periodic bitrate calculation */
2212                                 for (i = 0; i < cnt_ports; i++)
2213                                         rte_stats_bitrate_calc(bitrate_data,
2214                                                 ports_ids[i]);
2215                                 tics_datum = tics_current;
2216                         }
2217                 }
2218 #endif
2219 #ifdef RTE_LIB_LATENCYSTATS
2220                 if (latencystats_enabled != 0 &&
2221                                 latencystats_lcore_id == rte_lcore_id())
2222                         rte_latencystats_update();
2223 #endif
2224
2225         } while (! fc->stopped);
2226 }
2227
2228 static int
2229 start_pkt_forward_on_core(void *fwd_arg)
2230 {
2231         run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2232                              cur_fwd_config.fwd_eng->packet_fwd);
2233         return 0;
2234 }
2235
2236 /*
2237  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2238  * Used to start communication flows in network loopback test configurations.
2239  */
2240 static int
2241 run_one_txonly_burst_on_core(void *fwd_arg)
2242 {
2243         struct fwd_lcore *fwd_lc;
2244         struct fwd_lcore tmp_lcore;
2245
2246         fwd_lc = (struct fwd_lcore *) fwd_arg;
2247         tmp_lcore = *fwd_lc;
2248         tmp_lcore.stopped = 1;
2249         run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2250         return 0;
2251 }
2252
2253 /*
2254  * Launch packet forwarding:
2255  *     - Setup per-port forwarding context.
2256  *     - launch logical cores with their forwarding configuration.
2257  */
2258 static void
2259 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2260 {
2261         unsigned int i;
2262         unsigned int lc_id;
2263         int diag;
2264
2265         for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2266                 lc_id = fwd_lcores_cpuids[i];
2267                 if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2268                         fwd_lcores[i]->stopped = 0;
2269                         diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2270                                                      fwd_lcores[i], lc_id);
2271                         if (diag != 0)
2272                                 fprintf(stderr,
2273                                         "launch lcore %u failed - diag=%d\n",
2274                                         lc_id, diag);
2275                 }
2276         }
2277 }
2278
2279 /*
2280  * Launch packet forwarding configuration.
2281  */
2282 void
2283 start_packet_forwarding(int with_tx_first)
2284 {
2285         port_fwd_begin_t port_fwd_begin;
2286         port_fwd_end_t  port_fwd_end;
2287         unsigned int i;
2288
2289         if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2290                 rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2291
2292         if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2293                 rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2294
2295         if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2296                 strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2297                 (!nb_rxq || !nb_txq))
2298                 rte_exit(EXIT_FAILURE,
2299                         "Either rxq or txq are 0, cannot use %s fwd mode\n",
2300                         cur_fwd_eng->fwd_mode_name);
2301
2302         if (all_ports_started() == 0) {
2303                 fprintf(stderr, "Not all ports were started\n");
2304                 return;
2305         }
2306         if (test_done == 0) {
2307                 fprintf(stderr, "Packet forwarding already started\n");
2308                 return;
2309         }
2310
2311         fwd_config_setup();
2312
2313         port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2314         if (port_fwd_begin != NULL) {
2315                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2316                         if (port_fwd_begin(fwd_ports_ids[i])) {
2317                                 fprintf(stderr,
2318                                         "Packet forwarding is not ready\n");
2319                                 return;
2320                         }
2321                 }
2322         }
2323
2324         if (with_tx_first) {
2325                 port_fwd_begin = tx_only_engine.port_fwd_begin;
2326                 if (port_fwd_begin != NULL) {
2327                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2328                                 if (port_fwd_begin(fwd_ports_ids[i])) {
2329                                         fprintf(stderr,
2330                                                 "Packet forwarding is not ready\n");
2331                                         return;
2332                                 }
2333                         }
2334                 }
2335         }
2336
2337         test_done = 0;
2338
2339         if(!no_flush_rx)
2340                 flush_fwd_rx_queues();
2341
2342         pkt_fwd_config_display(&cur_fwd_config);
2343         rxtx_config_display();
2344
2345         fwd_stats_reset();
2346         if (with_tx_first) {
2347                 while (with_tx_first--) {
2348                         launch_packet_forwarding(
2349                                         run_one_txonly_burst_on_core);
2350                         rte_eal_mp_wait_lcore();
2351                 }
2352                 port_fwd_end = tx_only_engine.port_fwd_end;
2353                 if (port_fwd_end != NULL) {
2354                         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2355                                 (*port_fwd_end)(fwd_ports_ids[i]);
2356                 }
2357         }
2358         launch_packet_forwarding(start_pkt_forward_on_core);
2359 }
2360
2361 void
2362 stop_packet_forwarding(void)
2363 {
2364         port_fwd_end_t port_fwd_end;
2365         lcoreid_t lc_id;
2366         portid_t pt_id;
2367         int i;
2368
2369         if (test_done) {
2370                 fprintf(stderr, "Packet forwarding not started\n");
2371                 return;
2372         }
2373         printf("Telling cores to stop...");
2374         for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2375                 fwd_lcores[lc_id]->stopped = 1;
2376         printf("\nWaiting for lcores to finish...\n");
2377         rte_eal_mp_wait_lcore();
2378         port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2379         if (port_fwd_end != NULL) {
2380                 for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2381                         pt_id = fwd_ports_ids[i];
2382                         (*port_fwd_end)(pt_id);
2383                 }
2384         }
2385
2386         fwd_stats_display();
2387
2388         printf("\nDone.\n");
2389         test_done = 1;
2390 }
2391
2392 void
2393 dev_set_link_up(portid_t pid)
2394 {
2395         if (rte_eth_dev_set_link_up(pid) < 0)
2396                 fprintf(stderr, "\nSet link up fail.\n");
2397 }
2398
2399 void
2400 dev_set_link_down(portid_t pid)
2401 {
2402         if (rte_eth_dev_set_link_down(pid) < 0)
2403                 fprintf(stderr, "\nSet link down fail.\n");
2404 }
2405
2406 static int
2407 all_ports_started(void)
2408 {
2409         portid_t pi;
2410         struct rte_port *port;
2411
2412         RTE_ETH_FOREACH_DEV(pi) {
2413                 port = &ports[pi];
2414                 /* Check if there is a port which is not started */
2415                 if ((port->port_status != RTE_PORT_STARTED) &&
2416                         (port->slave_flag == 0))
2417                         return 0;
2418         }
2419
2420         /* No port is not started */
2421         return 1;
2422 }
2423
2424 int
2425 port_is_stopped(portid_t port_id)
2426 {
2427         struct rte_port *port = &ports[port_id];
2428
2429         if ((port->port_status != RTE_PORT_STOPPED) &&
2430             (port->slave_flag == 0))
2431                 return 0;
2432         return 1;
2433 }
2434
2435 int
2436 all_ports_stopped(void)
2437 {
2438         portid_t pi;
2439
2440         RTE_ETH_FOREACH_DEV(pi) {
2441                 if (!port_is_stopped(pi))
2442                         return 0;
2443         }
2444
2445         return 1;
2446 }
2447
2448 int
2449 port_is_started(portid_t port_id)
2450 {
2451         if (port_id_is_invalid(port_id, ENABLED_WARN))
2452                 return 0;
2453
2454         if (ports[port_id].port_status != RTE_PORT_STARTED)
2455                 return 0;
2456
2457         return 1;
2458 }
2459
2460 /* Configure the Rx and Tx hairpin queues for the selected port. */
2461 static int
2462 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2463 {
2464         queueid_t qi;
2465         struct rte_eth_hairpin_conf hairpin_conf = {
2466                 .peer_count = 1,
2467         };
2468         int i;
2469         int diag;
2470         struct rte_port *port = &ports[pi];
2471         uint16_t peer_rx_port = pi;
2472         uint16_t peer_tx_port = pi;
2473         uint32_t manual = 1;
2474         uint32_t tx_exp = hairpin_mode & 0x10;
2475
2476         if (!(hairpin_mode & 0xf)) {
2477                 peer_rx_port = pi;
2478                 peer_tx_port = pi;
2479                 manual = 0;
2480         } else if (hairpin_mode & 0x1) {
2481                 peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2482                                                        RTE_ETH_DEV_NO_OWNER);
2483                 if (peer_tx_port >= RTE_MAX_ETHPORTS)
2484                         peer_tx_port = rte_eth_find_next_owned_by(0,
2485                                                 RTE_ETH_DEV_NO_OWNER);
2486                 if (p_pi != RTE_MAX_ETHPORTS) {
2487                         peer_rx_port = p_pi;
2488                 } else {
2489                         uint16_t next_pi;
2490
2491                         /* Last port will be the peer RX port of the first. */
2492                         RTE_ETH_FOREACH_DEV(next_pi)
2493                                 peer_rx_port = next_pi;
2494                 }
2495                 manual = 1;
2496         } else if (hairpin_mode & 0x2) {
2497                 if (cnt_pi & 0x1) {
2498                         peer_rx_port = p_pi;
2499                 } else {
2500                         peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2501                                                 RTE_ETH_DEV_NO_OWNER);
2502                         if (peer_rx_port >= RTE_MAX_ETHPORTS)
2503                                 peer_rx_port = pi;
2504                 }
2505                 peer_tx_port = peer_rx_port;
2506                 manual = 1;
2507         }
2508
2509         for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2510                 hairpin_conf.peers[0].port = peer_rx_port;
2511                 hairpin_conf.peers[0].queue = i + nb_rxq;
2512                 hairpin_conf.manual_bind = !!manual;
2513                 hairpin_conf.tx_explicit = !!tx_exp;
2514                 diag = rte_eth_tx_hairpin_queue_setup
2515                         (pi, qi, nb_txd, &hairpin_conf);
2516                 i++;
2517                 if (diag == 0)
2518                         continue;
2519
2520                 /* Fail to setup rx queue, return */
2521                 if (rte_atomic16_cmpset(&(port->port_status),
2522                                         RTE_PORT_HANDLING,
2523                                         RTE_PORT_STOPPED) == 0)
2524                         fprintf(stderr,
2525                                 "Port %d can not be set back to stopped\n", pi);
2526                 fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2527                         pi);
2528                 /* try to reconfigure queues next time */
2529                 port->need_reconfig_queues = 1;
2530                 return -1;
2531         }
2532         for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2533                 hairpin_conf.peers[0].port = peer_tx_port;
2534                 hairpin_conf.peers[0].queue = i + nb_txq;
2535                 hairpin_conf.manual_bind = !!manual;
2536                 hairpin_conf.tx_explicit = !!tx_exp;
2537                 diag = rte_eth_rx_hairpin_queue_setup
2538                         (pi, qi, nb_rxd, &hairpin_conf);
2539                 i++;
2540                 if (diag == 0)
2541                         continue;
2542
2543                 /* Fail to setup rx queue, return */
2544                 if (rte_atomic16_cmpset(&(port->port_status),
2545                                         RTE_PORT_HANDLING,
2546                                         RTE_PORT_STOPPED) == 0)
2547                         fprintf(stderr,
2548                                 "Port %d can not be set back to stopped\n", pi);
2549                 fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2550                         pi);
2551                 /* try to reconfigure queues next time */
2552                 port->need_reconfig_queues = 1;
2553                 return -1;
2554         }
2555         return 0;
2556 }
2557
2558 /* Configure the Rx with optional split. */
2559 int
2560 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2561                uint16_t nb_rx_desc, unsigned int socket_id,
2562                struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2563 {
2564         union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2565         unsigned int i, mp_n;
2566         int ret;
2567
2568         if (rx_pkt_nb_segs <= 1 ||
2569             (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2570                 rx_conf->rx_seg = NULL;
2571                 rx_conf->rx_nseg = 0;
2572                 ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2573                                              nb_rx_desc, socket_id,
2574                                              rx_conf, mp);
2575                 return ret;
2576         }
2577         for (i = 0; i < rx_pkt_nb_segs; i++) {
2578                 struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2579                 struct rte_mempool *mpx;
2580                 /*
2581                  * Use last valid pool for the segments with number
2582                  * exceeding the pool index.
2583                  */
2584                 mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2585                 mpx = mbuf_pool_find(socket_id, mp_n);
2586                 /* Handle zero as mbuf data buffer size. */
2587                 rx_seg->length = rx_pkt_seg_lengths[i] ?
2588                                    rx_pkt_seg_lengths[i] :
2589                                    mbuf_data_size[mp_n];
2590                 rx_seg->offset = i < rx_pkt_nb_offs ?
2591                                    rx_pkt_seg_offsets[i] : 0;
2592                 rx_seg->mp = mpx ? mpx : mp;
2593         }
2594         rx_conf->rx_nseg = rx_pkt_nb_segs;
2595         rx_conf->rx_seg = rx_useg;
2596         ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2597                                     socket_id, rx_conf, NULL);
2598         rx_conf->rx_seg = NULL;
2599         rx_conf->rx_nseg = 0;
2600         return ret;
2601 }
2602
2603 static int
2604 alloc_xstats_display_info(portid_t pi)
2605 {
2606         uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2607         uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2608         uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2609
2610         if (xstats_display_num == 0)
2611                 return 0;
2612
2613         *ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2614         if (*ids_supp == NULL)
2615                 goto fail_ids_supp;
2616
2617         *prev_values = calloc(xstats_display_num,
2618                               sizeof(**prev_values));
2619         if (*prev_values == NULL)
2620                 goto fail_prev_values;
2621
2622         *curr_values = calloc(xstats_display_num,
2623                               sizeof(**curr_values));
2624         if (*curr_values == NULL)
2625                 goto fail_curr_values;
2626
2627         ports[pi].xstats_info.allocated = true;
2628
2629         return 0;
2630
2631 fail_curr_values:
2632         free(*prev_values);
2633 fail_prev_values:
2634         free(*ids_supp);
2635 fail_ids_supp:
2636         return -ENOMEM;
2637 }
2638
2639 static void
2640 free_xstats_display_info(portid_t pi)
2641 {
2642         if (!ports[pi].xstats_info.allocated)
2643                 return;
2644         free(ports[pi].xstats_info.ids_supp);
2645         free(ports[pi].xstats_info.prev_values);
2646         free(ports[pi].xstats_info.curr_values);
2647         ports[pi].xstats_info.allocated = false;
2648 }
2649
2650 /** Fill helper structures for specified port to show extended statistics. */
2651 static void
2652 fill_xstats_display_info_for_port(portid_t pi)
2653 {
2654         unsigned int stat, stat_supp;
2655         const char *xstat_name;
2656         struct rte_port *port;
2657         uint64_t *ids_supp;
2658         int rc;
2659
2660         if (xstats_display_num == 0)
2661                 return;
2662
2663         if (pi == (portid_t)RTE_PORT_ALL) {
2664                 fill_xstats_display_info();
2665                 return;
2666         }
2667
2668         port = &ports[pi];
2669         if (port->port_status != RTE_PORT_STARTED)
2670                 return;
2671
2672         if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2673                 rte_exit(EXIT_FAILURE,
2674                          "Failed to allocate xstats display memory\n");
2675
2676         ids_supp = port->xstats_info.ids_supp;
2677         for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2678                 xstat_name = xstats_display[stat].name;
2679                 rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2680                                                    ids_supp + stat_supp);
2681                 if (rc != 0) {
2682                         fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2683                                 xstat_name, pi, stat);
2684                         continue;
2685                 }
2686                 stat_supp++;
2687         }
2688
2689         port->xstats_info.ids_supp_sz = stat_supp;
2690 }
2691
2692 /** Fill helper structures for all ports to show extended statistics. */
2693 static void
2694 fill_xstats_display_info(void)
2695 {
2696         portid_t pi;
2697
2698         if (xstats_display_num == 0)
2699                 return;
2700
2701         RTE_ETH_FOREACH_DEV(pi)
2702                 fill_xstats_display_info_for_port(pi);
2703 }
2704
2705 int
2706 start_port(portid_t pid)
2707 {
2708         int diag, need_check_link_status = -1;
2709         portid_t pi;
2710         portid_t p_pi = RTE_MAX_ETHPORTS;
2711         portid_t pl[RTE_MAX_ETHPORTS];
2712         portid_t peer_pl[RTE_MAX_ETHPORTS];
2713         uint16_t cnt_pi = 0;
2714         uint16_t cfg_pi = 0;
2715         int peer_pi;
2716         queueid_t qi;
2717         struct rte_port *port;
2718         struct rte_eth_hairpin_cap cap;
2719
2720         if (port_id_is_invalid(pid, ENABLED_WARN))
2721                 return 0;
2722
2723         RTE_ETH_FOREACH_DEV(pi) {
2724                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2725                         continue;
2726
2727                 need_check_link_status = 0;
2728                 port = &ports[pi];
2729                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2730                                                  RTE_PORT_HANDLING) == 0) {
2731                         fprintf(stderr, "Port %d is now not stopped\n", pi);
2732                         continue;
2733                 }
2734
2735                 if (port->need_reconfig > 0) {
2736                         struct rte_eth_conf dev_conf;
2737                         int k;
2738
2739                         port->need_reconfig = 0;
2740
2741                         if (flow_isolate_all) {
2742                                 int ret = port_flow_isolate(pi, 1);
2743                                 if (ret) {
2744                                         fprintf(stderr,
2745                                                 "Failed to apply isolated mode on port %d\n",
2746                                                 pi);
2747                                         return -1;
2748                                 }
2749                         }
2750                         configure_rxtx_dump_callbacks(0);
2751                         printf("Configuring Port %d (socket %u)\n", pi,
2752                                         port->socket_id);
2753                         if (nb_hairpinq > 0 &&
2754                             rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2755                                 fprintf(stderr,
2756                                         "Port %d doesn't support hairpin queues\n",
2757                                         pi);
2758                                 return -1;
2759                         }
2760
2761                         /* configure port */
2762                         diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2763                                                      nb_txq + nb_hairpinq,
2764                                                      &(port->dev_conf));
2765                         if (diag != 0) {
2766                                 if (rte_atomic16_cmpset(&(port->port_status),
2767                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2768                                         fprintf(stderr,
2769                                                 "Port %d can not be set back to stopped\n",
2770                                                 pi);
2771                                 fprintf(stderr, "Fail to configure port %d\n",
2772                                         pi);
2773                                 /* try to reconfigure port next time */
2774                                 port->need_reconfig = 1;
2775                                 return -1;
2776                         }
2777                         /* get device configuration*/
2778                         if (0 !=
2779                                 eth_dev_conf_get_print_err(pi, &dev_conf)) {
2780                                 fprintf(stderr,
2781                                         "port %d can not get device configuration\n",
2782                                         pi);
2783                                 return -1;
2784                         }
2785                         /* Apply Rx offloads configuration */
2786                         if (dev_conf.rxmode.offloads !=
2787                             port->dev_conf.rxmode.offloads) {
2788                                 port->dev_conf.rxmode.offloads |=
2789                                         dev_conf.rxmode.offloads;
2790                                 for (k = 0;
2791                                      k < port->dev_info.max_rx_queues;
2792                                      k++)
2793                                         port->rx_conf[k].offloads |=
2794                                                 dev_conf.rxmode.offloads;
2795                         }
2796                         /* Apply Tx offloads configuration */
2797                         if (dev_conf.txmode.offloads !=
2798                             port->dev_conf.txmode.offloads) {
2799                                 port->dev_conf.txmode.offloads |=
2800                                         dev_conf.txmode.offloads;
2801                                 for (k = 0;
2802                                      k < port->dev_info.max_tx_queues;
2803                                      k++)
2804                                         port->tx_conf[k].offloads |=
2805                                                 dev_conf.txmode.offloads;
2806                         }
2807                 }
2808                 if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2809                         port->need_reconfig_queues = 0;
2810                         /* setup tx queues */
2811                         for (qi = 0; qi < nb_txq; qi++) {
2812                                 if ((numa_support) &&
2813                                         (txring_numa[pi] != NUMA_NO_CONFIG))
2814                                         diag = rte_eth_tx_queue_setup(pi, qi,
2815                                                 port->nb_tx_desc[qi],
2816                                                 txring_numa[pi],
2817                                                 &(port->tx_conf[qi]));
2818                                 else
2819                                         diag = rte_eth_tx_queue_setup(pi, qi,
2820                                                 port->nb_tx_desc[qi],
2821                                                 port->socket_id,
2822                                                 &(port->tx_conf[qi]));
2823
2824                                 if (diag == 0)
2825                                         continue;
2826
2827                                 /* Fail to setup tx queue, return */
2828                                 if (rte_atomic16_cmpset(&(port->port_status),
2829                                                         RTE_PORT_HANDLING,
2830                                                         RTE_PORT_STOPPED) == 0)
2831                                         fprintf(stderr,
2832                                                 "Port %d can not be set back to stopped\n",
2833                                                 pi);
2834                                 fprintf(stderr,
2835                                         "Fail to configure port %d tx queues\n",
2836                                         pi);
2837                                 /* try to reconfigure queues next time */
2838                                 port->need_reconfig_queues = 1;
2839                                 return -1;
2840                         }
2841                         for (qi = 0; qi < nb_rxq; qi++) {
2842                                 /* setup rx queues */
2843                                 if ((numa_support) &&
2844                                         (rxring_numa[pi] != NUMA_NO_CONFIG)) {
2845                                         struct rte_mempool * mp =
2846                                                 mbuf_pool_find
2847                                                         (rxring_numa[pi], 0);
2848                                         if (mp == NULL) {
2849                                                 fprintf(stderr,
2850                                                         "Failed to setup RX queue: No mempool allocation on the socket %d\n",
2851                                                         rxring_numa[pi]);
2852                                                 return -1;
2853                                         }
2854
2855                                         diag = rx_queue_setup(pi, qi,
2856                                              port->nb_rx_desc[qi],
2857                                              rxring_numa[pi],
2858                                              &(port->rx_conf[qi]),
2859                                              mp);
2860                                 } else {
2861                                         struct rte_mempool *mp =
2862                                                 mbuf_pool_find
2863                                                         (port->socket_id, 0);
2864                                         if (mp == NULL) {
2865                                                 fprintf(stderr,
2866                                                         "Failed to setup RX queue: No mempool allocation on the socket %d\n",
2867                                                         port->socket_id);
2868                                                 return -1;
2869                                         }
2870                                         diag = rx_queue_setup(pi, qi,
2871                                              port->nb_rx_desc[qi],
2872                                              port->socket_id,
2873                                              &(port->rx_conf[qi]),
2874                                              mp);
2875                                 }
2876                                 if (diag == 0)
2877                                         continue;
2878
2879                                 /* Fail to setup rx queue, return */
2880                                 if (rte_atomic16_cmpset(&(port->port_status),
2881                                                         RTE_PORT_HANDLING,
2882                                                         RTE_PORT_STOPPED) == 0)
2883                                         fprintf(stderr,
2884                                                 "Port %d can not be set back to stopped\n",
2885                                                 pi);
2886                                 fprintf(stderr,
2887                                         "Fail to configure port %d rx queues\n",
2888                                         pi);
2889                                 /* try to reconfigure queues next time */
2890                                 port->need_reconfig_queues = 1;
2891                                 return -1;
2892                         }
2893                         /* setup hairpin queues */
2894                         if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2895                                 return -1;
2896                 }
2897                 configure_rxtx_dump_callbacks(verbose_level);
2898                 if (clear_ptypes) {
2899                         diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2900                                         NULL, 0);
2901                         if (diag < 0)
2902                                 fprintf(stderr,
2903                                         "Port %d: Failed to disable Ptype parsing\n",
2904                                         pi);
2905                 }
2906
2907                 p_pi = pi;
2908                 cnt_pi++;
2909
2910                 /* start port */
2911                 diag = eth_dev_start_mp(pi);
2912                 if (diag < 0) {
2913                         fprintf(stderr, "Fail to start port %d: %s\n",
2914                                 pi, rte_strerror(-diag));
2915
2916                         /* Fail to setup rx queue, return */
2917                         if (rte_atomic16_cmpset(&(port->port_status),
2918                                 RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2919                                 fprintf(stderr,
2920                                         "Port %d can not be set back to stopped\n",
2921                                         pi);
2922                         continue;
2923                 }
2924
2925                 if (rte_atomic16_cmpset(&(port->port_status),
2926                         RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2927                         fprintf(stderr, "Port %d can not be set into started\n",
2928                                 pi);
2929
2930                 if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2931                         printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
2932                                         RTE_ETHER_ADDR_BYTES(&port->eth_addr));
2933
2934                 /* at least one port started, need checking link status */
2935                 need_check_link_status = 1;
2936
2937                 pl[cfg_pi++] = pi;
2938         }
2939
2940         if (need_check_link_status == 1 && !no_link_check)
2941                 check_all_ports_link_status(RTE_PORT_ALL);
2942         else if (need_check_link_status == 0)
2943                 fprintf(stderr, "Please stop the ports first\n");
2944
2945         if (hairpin_mode & 0xf) {
2946                 uint16_t i;
2947                 int j;
2948
2949                 /* bind all started hairpin ports */
2950                 for (i = 0; i < cfg_pi; i++) {
2951                         pi = pl[i];
2952                         /* bind current Tx to all peer Rx */
2953                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2954                                                         RTE_MAX_ETHPORTS, 1);
2955                         if (peer_pi < 0)
2956                                 return peer_pi;
2957                         for (j = 0; j < peer_pi; j++) {
2958                                 if (!port_is_started(peer_pl[j]))
2959                                         continue;
2960                                 diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2961                                 if (diag < 0) {
2962                                         fprintf(stderr,
2963                                                 "Error during binding hairpin Tx port %u to %u: %s\n",
2964                                                 pi, peer_pl[j],
2965                                                 rte_strerror(-diag));
2966                                         return -1;
2967                                 }
2968                         }
2969                         /* bind all peer Tx to current Rx */
2970                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2971                                                         RTE_MAX_ETHPORTS, 0);
2972                         if (peer_pi < 0)
2973                                 return peer_pi;
2974                         for (j = 0; j < peer_pi; j++) {
2975                                 if (!port_is_started(peer_pl[j]))
2976                                         continue;
2977                                 diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2978                                 if (diag < 0) {
2979                                         fprintf(stderr,
2980                                                 "Error during binding hairpin Tx port %u to %u: %s\n",
2981                                                 peer_pl[j], pi,
2982                                                 rte_strerror(-diag));
2983                                         return -1;
2984                                 }
2985                         }
2986                 }
2987         }
2988
2989         fill_xstats_display_info_for_port(pid);
2990
2991         printf("Done\n");
2992         return 0;
2993 }
2994
2995 void
2996 stop_port(portid_t pid)
2997 {
2998         portid_t pi;
2999         struct rte_port *port;
3000         int need_check_link_status = 0;
3001         portid_t peer_pl[RTE_MAX_ETHPORTS];
3002         int peer_pi;
3003
3004         if (port_id_is_invalid(pid, ENABLED_WARN))
3005                 return;
3006
3007         printf("Stopping ports...\n");
3008
3009         RTE_ETH_FOREACH_DEV(pi) {
3010                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3011                         continue;
3012
3013                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
3014                         fprintf(stderr,
3015                                 "Please remove port %d from forwarding configuration.\n",
3016                                 pi);
3017                         continue;
3018                 }
3019
3020                 if (port_is_bonding_slave(pi)) {
3021                         fprintf(stderr,
3022                                 "Please remove port %d from bonded device.\n",
3023                                 pi);
3024                         continue;
3025                 }
3026
3027                 port = &ports[pi];
3028                 if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
3029                                                 RTE_PORT_HANDLING) == 0)
3030                         continue;
3031
3032                 if (hairpin_mode & 0xf) {
3033                         int j;
3034
3035                         rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3036                         /* unbind all peer Tx from current Rx */
3037                         peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3038                                                         RTE_MAX_ETHPORTS, 0);
3039                         if (peer_pi < 0)
3040                                 continue;
3041                         for (j = 0; j < peer_pi; j++) {
3042                                 if (!port_is_started(peer_pl[j]))
3043                                         continue;
3044                                 rte_eth_hairpin_unbind(peer_pl[j], pi);
3045                         }
3046                 }
3047
3048                 if (port->flow_list)
3049                         port_flow_flush(pi);
3050
3051                 if (eth_dev_stop_mp(pi) != 0)
3052                         RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3053                                 pi);
3054
3055                 if (rte_atomic16_cmpset(&(port->port_status),
3056                         RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
3057                         fprintf(stderr, "Port %d can not be set into stopped\n",
3058                                 pi);
3059                 need_check_link_status = 1;
3060         }
3061         if (need_check_link_status && !no_link_check)
3062                 check_all_ports_link_status(RTE_PORT_ALL);
3063
3064         printf("Done\n");
3065 }
3066
3067 static void
3068 remove_invalid_ports_in(portid_t *array, portid_t *total)
3069 {
3070         portid_t i;
3071         portid_t new_total = 0;
3072
3073         for (i = 0; i < *total; i++)
3074                 if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3075                         array[new_total] = array[i];
3076                         new_total++;
3077                 }
3078         *total = new_total;
3079 }
3080
3081 static void
3082 remove_invalid_ports(void)
3083 {
3084         remove_invalid_ports_in(ports_ids, &nb_ports);
3085         remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3086         nb_cfg_ports = nb_fwd_ports;
3087 }
3088
3089 void
3090 close_port(portid_t pid)
3091 {
3092         portid_t pi;
3093         struct rte_port *port;
3094
3095         if (port_id_is_invalid(pid, ENABLED_WARN))
3096                 return;
3097
3098         printf("Closing ports...\n");
3099
3100         RTE_ETH_FOREACH_DEV(pi) {
3101                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3102                         continue;
3103
3104                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
3105                         fprintf(stderr,
3106                                 "Please remove port %d from forwarding configuration.\n",
3107                                 pi);
3108                         continue;
3109                 }
3110
3111                 if (port_is_bonding_slave(pi)) {
3112                         fprintf(stderr,
3113                                 "Please remove port %d from bonded device.\n",
3114                                 pi);
3115                         continue;
3116                 }
3117
3118                 port = &ports[pi];
3119                 if (rte_atomic16_cmpset(&(port->port_status),
3120                         RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
3121                         fprintf(stderr, "Port %d is already closed\n", pi);
3122                         continue;
3123                 }
3124
3125                 if (is_proc_primary()) {
3126                         port_flow_flush(pi);
3127                         rte_eth_dev_close(pi);
3128                 }
3129
3130                 free_xstats_display_info(pi);
3131         }
3132
3133         remove_invalid_ports();
3134         printf("Done\n");
3135 }
3136
3137 void
3138 reset_port(portid_t pid)
3139 {
3140         int diag;
3141         portid_t pi;
3142         struct rte_port *port;
3143
3144         if (port_id_is_invalid(pid, ENABLED_WARN))
3145                 return;
3146
3147         if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3148                 (pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3149                 fprintf(stderr,
3150                         "Can not reset port(s), please stop port(s) first.\n");
3151                 return;
3152         }
3153
3154         printf("Resetting ports...\n");
3155
3156         RTE_ETH_FOREACH_DEV(pi) {
3157                 if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3158                         continue;
3159
3160                 if (port_is_forwarding(pi) != 0 && test_done == 0) {
3161                         fprintf(stderr,
3162                                 "Please remove port %d from forwarding configuration.\n",
3163                                 pi);
3164                         continue;
3165                 }
3166
3167                 if (port_is_bonding_slave(pi)) {
3168                         fprintf(stderr,
3169                                 "Please remove port %d from bonded device.\n",
3170                                 pi);
3171                         continue;
3172                 }
3173
3174                 diag = rte_eth_dev_reset(pi);
3175                 if (diag == 0) {
3176                         port = &ports[pi];
3177                         port->need_reconfig = 1;
3178                         port->need_reconfig_queues = 1;
3179                 } else {
3180                         fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3181                                 pi, diag);
3182                 }
3183         }
3184
3185         printf("Done\n");
3186 }
3187
3188 void
3189 attach_port(char *identifier)
3190 {
3191         portid_t pi;
3192         struct rte_dev_iterator iterator;
3193
3194         printf("Attaching a new port...\n");
3195
3196         if (identifier == NULL) {
3197                 fprintf(stderr, "Invalid parameters are specified\n");
3198                 return;
3199         }
3200
3201         if (rte_dev_probe(identifier) < 0) {
3202                 TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3203                 return;
3204         }
3205
3206         /* first attach mode: event */
3207         if (setup_on_probe_event) {
3208                 /* new ports are detected on RTE_ETH_EVENT_NEW event */
3209                 for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3210                         if (ports[pi].port_status == RTE_PORT_HANDLING &&
3211                                         ports[pi].need_setup != 0)
3212                                 setup_attached_port(pi);
3213                 return;
3214         }
3215
3216         /* second attach mode: iterator */
3217         RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3218                 /* setup ports matching the devargs used for probing */
3219                 if (port_is_forwarding(pi))
3220                         continue; /* port was already attached before */
3221                 setup_attached_port(pi);
3222         }
3223 }
3224
3225 static void
3226 setup_attached_port(portid_t pi)
3227 {
3228         unsigned int socket_id;
3229         int ret;
3230
3231         socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3232         /* if socket_id is invalid, set to the first available socket. */
3233         if (check_socket_id(socket_id) < 0)
3234                 socket_id = socket_ids[0];
3235         reconfig(pi, socket_id);
3236         ret = rte_eth_promiscuous_enable(pi);
3237         if (ret != 0)
3238                 fprintf(stderr,
3239                         "Error during enabling promiscuous mode for port %u: %s - ignore\n",
3240                         pi, rte_strerror(-ret));
3241
3242         ports_ids[nb_ports++] = pi;
3243         fwd_ports_ids[nb_fwd_ports++] = pi;
3244         nb_cfg_ports = nb_fwd_ports;
3245         ports[pi].need_setup = 0;
3246         ports[pi].port_status = RTE_PORT_STOPPED;
3247
3248         printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3249         printf("Done\n");
3250 }
3251
3252 static void
3253 detach_device(struct rte_device *dev)
3254 {
3255         portid_t sibling;
3256
3257         if (dev == NULL) {
3258                 fprintf(stderr, "Device already removed\n");
3259                 return;
3260         }
3261
3262         printf("Removing a device...\n");
3263
3264         RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3265                 if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3266                         if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3267                                 fprintf(stderr, "Port %u not stopped\n",
3268                                         sibling);
3269                                 return;
3270                         }
3271                         port_flow_flush(sibling);
3272                 }
3273         }
3274
3275         if (rte_dev_remove(dev) < 0) {
3276                 TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
3277                 return;
3278         }
3279         remove_invalid_ports();
3280
3281         printf("Device is detached\n");
3282         printf("Now total ports is %d\n", nb_ports);
3283         printf("Done\n");
3284         return;
3285 }
3286
3287 void
3288 detach_port_device(portid_t port_id)
3289 {
3290         int ret;
3291         struct rte_eth_dev_info dev_info;
3292
3293         if (port_id_is_invalid(port_id, ENABLED_WARN))
3294                 return;
3295
3296         if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3297                 if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3298                         fprintf(stderr, "Port not stopped\n");
3299                         return;
3300                 }
3301                 fprintf(stderr, "Port was not closed\n");
3302         }
3303
3304         ret = eth_dev_info_get_print_err(port_id, &dev_info);
3305         if (ret != 0) {
3306                 TESTPMD_LOG(ERR,
3307                         "Failed to get device info for port %d, not detaching\n",
3308                         port_id);
3309                 return;
3310         }
3311         detach_device(dev_info.device);
3312 }
3313
3314 void
3315 detach_devargs(char *identifier)
3316 {
3317         struct rte_dev_iterator iterator;
3318         struct rte_devargs da;
3319         portid_t port_id;
3320
3321         printf("Removing a device...\n");
3322
3323         memset(&da, 0, sizeof(da));
3324         if (rte_devargs_parsef(&da, "%s", identifier)) {
3325                 fprintf(stderr, "cannot parse identifier\n");
3326                 return;
3327         }
3328
3329         RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3330                 if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3331                         if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3332                                 fprintf(stderr, "Port %u not stopped\n",
3333                                         port_id);
3334                                 rte_eth_iterator_cleanup(&iterator);
3335                                 rte_devargs_reset(&da);
3336                                 return;
3337                         }
3338                         port_flow_flush(port_id);
3339                 }
3340         }
3341
3342         if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3343                 TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3344                             da.name, da.bus->name);
3345                 rte_devargs_reset(&da);
3346                 return;
3347         }
3348
3349         remove_invalid_ports();
3350
3351         printf("Device %s is detached\n", identifier);
3352         printf("Now total ports is %d\n", nb_ports);
3353         printf("Done\n");
3354         rte_devargs_reset(&da);
3355 }
3356
3357 void
3358 pmd_test_exit(void)
3359 {
3360         portid_t pt_id;
3361         unsigned int i;
3362         int ret;
3363
3364         if (test_done == 0)
3365                 stop_packet_forwarding();
3366
3367 #ifndef RTE_EXEC_ENV_WINDOWS
3368         for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3369                 if (mempools[i]) {
3370                         if (mp_alloc_type == MP_ALLOC_ANON)
3371                                 rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3372                                                      NULL);
3373                 }
3374         }
3375 #endif
3376         if (ports != NULL) {
3377                 no_link_check = 1;
3378                 RTE_ETH_FOREACH_DEV(pt_id) {
3379                         printf("\nStopping port %d...\n", pt_id);
3380                         fflush(stdout);
3381                         stop_port(pt_id);
3382                 }
3383                 RTE_ETH_FOREACH_DEV(pt_id) {
3384                         printf("\nShutting down port %d...\n", pt_id);
3385                         fflush(stdout);
3386                         close_port(pt_id);
3387                 }
3388         }
3389
3390         if (hot_plug) {
3391                 ret = rte_dev_event_monitor_stop();
3392                 if (ret) {
3393                         RTE_LOG(ERR, EAL,
3394                                 "fail to stop device event monitor.");
3395                         return;
3396                 }
3397
3398                 ret = rte_dev_event_callback_unregister(NULL,
3399                         dev_event_callback, NULL);
3400                 if (ret < 0) {
3401                         RTE_LOG(ERR, EAL,
3402                                 "fail to unregister device event callback.\n");
3403                         return;
3404                 }
3405
3406                 ret = rte_dev_hotplug_handle_disable();
3407                 if (ret) {
3408                         RTE_LOG(ERR, EAL,
3409                                 "fail to disable hotplug handling.\n");
3410                         return;
3411                 }
3412         }
3413         for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3414                 if (mempools[i])
3415                         mempool_free_mp(mempools[i]);
3416         }
3417         free(xstats_display);
3418
3419         printf("\nBye...\n");
3420 }
3421
3422 typedef void (*cmd_func_t)(void);
3423 struct pmd_test_command {
3424         const char *cmd_name;
3425         cmd_func_t cmd_func;
3426 };
3427
3428 /* Check the link status of all ports in up to 9s, and print them finally */
3429 static void
3430 check_all_ports_link_status(uint32_t port_mask)
3431 {
3432 #define CHECK_INTERVAL 100 /* 100ms */
3433 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3434         portid_t portid;
3435         uint8_t count, all_ports_up, print_flag = 0;
3436         struct rte_eth_link link;
3437         int ret;
3438         char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3439
3440         printf("Checking link statuses...\n");
3441         fflush(stdout);
3442         for (count = 0; count <= MAX_CHECK_TIME; count++) {
3443                 all_ports_up = 1;
3444                 RTE_ETH_FOREACH_DEV(portid) {
3445                         if ((port_mask & (1 << portid)) == 0)
3446                                 continue;
3447                         memset(&link, 0, sizeof(link));
3448                         ret = rte_eth_link_get_nowait(portid, &link);
3449                         if (ret < 0) {
3450                                 all_ports_up = 0;
3451                                 if (print_flag == 1)
3452                                         fprintf(stderr,
3453                                                 "Port %u link get failed: %s\n",
3454                                                 portid, rte_strerror(-ret));
3455                                 continue;
3456                         }
3457                         /* print link status if flag set */
3458                         if (print_flag == 1) {
3459                                 rte_eth_link_to_str(link_status,
3460                                         sizeof(link_status), &link);
3461                                 printf("Port %d %s\n", portid, link_status);
3462                                 continue;
3463                         }
3464                         /* clear all_ports_up flag if any link down */
3465                         if (link.link_status == ETH_LINK_DOWN) {
3466                                 all_ports_up = 0;
3467                                 break;
3468                         }
3469                 }
3470                 /* after finally printing all link status, get out */
3471                 if (print_flag == 1)
3472                         break;
3473
3474                 if (all_ports_up == 0) {
3475                         fflush(stdout);
3476                         rte_delay_ms(CHECK_INTERVAL);
3477                 }
3478
3479                 /* set the print_flag if all ports up or timeout */
3480                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3481                         print_flag = 1;
3482                 }
3483
3484                 if (lsc_interrupt)
3485                         break;
3486         }
3487 }
3488
3489 static void
3490 rmv_port_callback(void *arg)
3491 {
3492         int need_to_start = 0;
3493         int org_no_link_check = no_link_check;
3494         portid_t port_id = (intptr_t)arg;
3495         struct rte_eth_dev_info dev_info;
3496         int ret;
3497
3498         RTE_ETH_VALID_PORTID_OR_RET(port_id);
3499
3500         if (!test_done && port_is_forwarding(port_id)) {
3501                 need_to_start = 1;
3502                 stop_packet_forwarding();
3503         }
3504         no_link_check = 1;
3505         stop_port(port_id);
3506         no_link_check = org_no_link_check;
3507
3508         ret = eth_dev_info_get_print_err(port_id, &dev_info);
3509         if (ret != 0)
3510                 TESTPMD_LOG(ERR,
3511                         "Failed to get device info for port %d, not detaching\n",
3512                         port_id);
3513         else {
3514                 struct rte_device *device = dev_info.device;
3515                 close_port(port_id);
3516                 detach_device(device); /* might be already removed or have more ports */
3517         }
3518         if (need_to_start)
3519                 start_packet_forwarding(0);
3520 }
3521
3522 /* This function is used by the interrupt thread */
3523 static int
3524 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3525                   void *ret_param)
3526 {
3527         RTE_SET_USED(param);
3528         RTE_SET_USED(ret_param);
3529
3530         if (type >= RTE_ETH_EVENT_MAX) {
3531                 fprintf(stderr,
3532                         "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3533                         port_id, __func__, type);
3534                 fflush(stderr);
3535         } else if (event_print_mask & (UINT32_C(1) << type)) {
3536                 printf("\nPort %" PRIu16 ": %s event\n", port_id,
3537                         eth_event_desc[type]);
3538                 fflush(stdout);
3539         }
3540
3541         switch (type) {
3542         case RTE_ETH_EVENT_NEW:
3543                 ports[port_id].need_setup = 1;
3544                 ports[port_id].port_status = RTE_PORT_HANDLING;
3545                 break;
3546         case RTE_ETH_EVENT_INTR_RMV:
3547                 if (port_id_is_invalid(port_id, DISABLED_WARN))
3548                         break;
3549                 if (rte_eal_alarm_set(100000,
3550                                 rmv_port_callback, (void *)(intptr_t)port_id))
3551                         fprintf(stderr,
3552                                 "Could not set up deferred device removal\n");
3553                 break;
3554         case RTE_ETH_EVENT_DESTROY:
3555                 ports[port_id].port_status = RTE_PORT_CLOSED;
3556                 printf("Port %u is closed\n", port_id);
3557                 break;
3558         default:
3559                 break;
3560         }
3561         return 0;
3562 }
3563
3564 static int
3565 register_eth_event_callback(void)
3566 {
3567         int ret;
3568         enum rte_eth_event_type event;
3569
3570         for (event = RTE_ETH_EVENT_UNKNOWN;
3571                         event < RTE_ETH_EVENT_MAX; event++) {
3572                 ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3573                                 event,
3574                                 eth_event_callback,
3575                                 NULL);
3576                 if (ret != 0) {
3577                         TESTPMD_LOG(ERR, "Failed to register callback for "
3578                                         "%s event\n", eth_event_desc[event]);
3579                         return -1;
3580                 }
3581         }
3582
3583         return 0;
3584 }
3585
3586 /* This function is used by the interrupt thread */
3587 static void
3588 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3589                              __rte_unused void *arg)
3590 {
3591         uint16_t port_id;
3592         int ret;
3593
3594         if (type >= RTE_DEV_EVENT_MAX) {
3595                 fprintf(stderr, "%s called upon invalid event %d\n",
3596                         __func__, type);
3597                 fflush(stderr);
3598         }
3599
3600         switch (type) {
3601         case RTE_DEV_EVENT_REMOVE:
3602                 RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3603                         device_name);
3604                 ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3605                 if (ret) {
3606                         RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3607                                 device_name);
3608                         return;
3609                 }
3610                 /*
3611                  * Because the user's callback is invoked in eal interrupt
3612                  * callback, the interrupt callback need to be finished before
3613                  * it can be unregistered when detaching device. So finish
3614                  * callback soon and use a deferred removal to detach device
3615                  * is need. It is a workaround, once the device detaching be
3616                  * moved into the eal in the future, the deferred removal could
3617                  * be deleted.
3618                  */
3619                 if (rte_eal_alarm_set(100000,
3620                                 rmv_port_callback, (void *)(intptr_t)port_id))
3621                         RTE_LOG(ERR, EAL,
3622                                 "Could not set up deferred device removal\n");
3623                 break;
3624         case RTE_DEV_EVENT_ADD:
3625                 RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3626                         device_name);
3627                 /* TODO: After finish kernel driver binding,
3628                  * begin to attach port.
3629                  */
3630                 break;
3631         default:
3632                 break;
3633         }
3634 }
3635
3636 static void
3637 rxtx_port_config(struct rte_port *port)
3638 {
3639         uint16_t qid;
3640         uint64_t offloads;
3641
3642         for (qid = 0; qid < nb_rxq; qid++) {
3643                 offloads = port->rx_conf[qid].offloads;
3644                 port->rx_conf[qid] = port->dev_info.default_rxconf;
3645                 if (offloads != 0)
3646                         port->rx_conf[qid].offloads = offloads;
3647
3648                 /* Check if any Rx parameters have been passed */
3649                 if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3650                         port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3651
3652                 if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3653                         port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3654
3655                 if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3656                         port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3657
3658                 if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3659                         port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3660
3661                 if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3662                         port->rx_conf[qid].rx_drop_en = rx_drop_en;
3663
3664                 port->nb_rx_desc[qid] = nb_rxd;
3665         }
3666
3667         for (qid = 0; qid < nb_txq; qid++) {
3668                 offloads = port->tx_conf[qid].offloads;
3669                 port->tx_conf[qid] = port->dev_info.default_txconf;
3670                 if (offloads != 0)
3671                         port->tx_conf[qid].offloads = offloads;
3672
3673                 /* Check if any Tx parameters have been passed */
3674                 if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3675                         port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3676
3677                 if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3678                         port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3679
3680                 if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3681                         port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3682
3683                 if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3684                         port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3685
3686                 if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3687                         port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3688
3689                 port->nb_tx_desc[qid] = nb_txd;
3690         }
3691 }
3692
3693 /*
3694  * Helper function to arrange max_rx_pktlen value and JUMBO_FRAME offload,
3695  * MTU is also aligned.
3696  *
3697  * port->dev_info should be set before calling this function.
3698  *
3699  * if 'max_rx_pktlen' is zero, it is set to current device value, "MTU +
3700  * ETH_OVERHEAD". This is useful to update flags but not MTU value.
3701  *
3702  * return 0 on success, negative on error
3703  */
3704 int
3705 update_jumbo_frame_offload(portid_t portid, uint32_t max_rx_pktlen)
3706 {
3707         struct rte_port *port = &ports[portid];
3708         uint32_t eth_overhead;
3709         uint64_t rx_offloads;
3710         uint16_t mtu, new_mtu;
3711         bool on;
3712
3713         eth_overhead = get_eth_overhead(&port->dev_info);
3714
3715         if (rte_eth_dev_get_mtu(portid, &mtu) != 0) {
3716                 printf("Failed to get MTU for port %u\n", portid);
3717                 return -1;
3718         }
3719
3720         if (max_rx_pktlen == 0)
3721                 max_rx_pktlen = mtu + eth_overhead;
3722
3723         rx_offloads = port->dev_conf.rxmode.offloads;
3724         new_mtu = max_rx_pktlen - eth_overhead;
3725
3726         if (new_mtu <= RTE_ETHER_MTU) {
3727                 rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3728                 on = false;
3729         } else {
3730                 if ((port->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3731                         fprintf(stderr,
3732                                 "Frame size (%u) is not supported by port %u\n",
3733                                 max_rx_pktlen, portid);
3734                         return -1;
3735                 }
3736                 rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3737                 on = true;
3738         }
3739
3740         if (rx_offloads != port->dev_conf.rxmode.offloads) {
3741                 uint16_t qid;
3742
3743                 port->dev_conf.rxmode.offloads = rx_offloads;
3744
3745                 /* Apply JUMBO_FRAME offload configuration to Rx queue(s) */
3746                 for (qid = 0; qid < port->dev_info.nb_rx_queues; qid++) {
3747                         if (on)
3748                                 port->rx_conf[qid].offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3749                         else
3750                                 port->rx_conf[qid].offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3751                 }
3752         }
3753
3754         if (mtu == new_mtu)
3755                 return 0;
3756
3757         if (eth_dev_set_mtu_mp(portid, new_mtu) != 0) {
3758                 fprintf(stderr,
3759                         "Failed to set MTU to %u for port %u\n",
3760                         new_mtu, portid);
3761                 return -1;
3762         }
3763
3764         port->dev_conf.rxmode.mtu = new_mtu;
3765
3766         return 0;
3767 }
3768
3769 void
3770 init_port_config(void)
3771 {
3772         portid_t pid;
3773         struct rte_port *port;
3774         int ret, i;
3775
3776         RTE_ETH_FOREACH_DEV(pid) {
3777                 port = &ports[pid];
3778                 port->dev_conf.fdir_conf = fdir_conf;
3779
3780                 ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3781                 if (ret != 0)
3782                         return;
3783
3784                 if (nb_rxq > 1) {
3785                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3786                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3787                                 rss_hf & port->dev_info.flow_type_rss_offloads;
3788                 } else {
3789                         port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3790                         port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3791                 }
3792
3793                 if (port->dcb_flag == 0) {
3794                         if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
3795                                 port->dev_conf.rxmode.mq_mode =
3796                                         (enum rte_eth_rx_mq_mode)
3797                                                 (rx_mq_mode & ETH_MQ_RX_RSS);
3798                         } else {
3799                                 port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3800                                 port->dev_conf.rxmode.offloads &=
3801                                                 ~DEV_RX_OFFLOAD_RSS_HASH;
3802
3803                                 for (i = 0;
3804                                      i < port->dev_info.nb_rx_queues;
3805                                      i++)
3806                                         port->rx_conf[i].offloads &=
3807                                                 ~DEV_RX_OFFLOAD_RSS_HASH;
3808                         }
3809                 }
3810
3811                 rxtx_port_config(port);
3812
3813                 ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3814                 if (ret != 0)
3815                         return;
3816
3817 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3818                 rte_pmd_ixgbe_bypass_init(pid);
3819 #endif
3820
3821                 if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3822                         port->dev_conf.intr_conf.lsc = 1;
3823                 if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3824                         port->dev_conf.intr_conf.rmv = 1;
3825         }
3826 }
3827
3828 void set_port_slave_flag(portid_t slave_pid)
3829 {
3830         struct rte_port *port;
3831
3832         port = &ports[slave_pid];
3833         port->slave_flag = 1;
3834 }
3835
3836 void clear_port_slave_flag(portid_t slave_pid)
3837 {
3838         struct rte_port *port;
3839
3840         port = &ports[slave_pid];
3841         port->slave_flag = 0;
3842 }
3843
3844 uint8_t port_is_bonding_slave(portid_t slave_pid)
3845 {
3846         struct rte_port *port;
3847         struct rte_eth_dev_info dev_info;
3848         int ret;
3849
3850         port = &ports[slave_pid];
3851         ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3852         if (ret != 0) {
3853                 TESTPMD_LOG(ERR,
3854                         "Failed to get device info for port id %d,"
3855                         "cannot determine if the port is a bonded slave",
3856                         slave_pid);
3857                 return 0;
3858         }
3859         if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3860                 return 1;
3861         return 0;
3862 }
3863
3864 const uint16_t vlan_tags[] = {
3865                 0,  1,  2,  3,  4,  5,  6,  7,
3866                 8,  9, 10, 11,  12, 13, 14, 15,
3867                 16, 17, 18, 19, 20, 21, 22, 23,
3868                 24, 25, 26, 27, 28, 29, 30, 31
3869 };
3870
3871 static  int
3872 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3873                  enum dcb_mode_enable dcb_mode,
3874                  enum rte_eth_nb_tcs num_tcs,
3875                  uint8_t pfc_en)
3876 {
3877         uint8_t i;
3878         int32_t rc;
3879         struct rte_eth_rss_conf rss_conf;
3880
3881         /*
3882          * Builds up the correct configuration for dcb+vt based on the vlan tags array
3883          * given above, and the number of traffic classes available for use.
3884          */
3885         if (dcb_mode == DCB_VT_ENABLED) {
3886                 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3887                                 &eth_conf->rx_adv_conf.vmdq_dcb_conf;
3888                 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3889                                 &eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3890
3891                 /* VMDQ+DCB RX and TX configurations */
3892                 vmdq_rx_conf->enable_default_pool = 0;
3893                 vmdq_rx_conf->default_pool = 0;
3894                 vmdq_rx_conf->nb_queue_pools =
3895                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3896                 vmdq_tx_conf->nb_queue_pools =
3897                         (num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3898
3899                 vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3900                 for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3901                         vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3902                         vmdq_rx_conf->pool_map[i].pools =
3903                                 1 << (i % vmdq_rx_conf->nb_queue_pools);
3904                 }
3905                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3906                         vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3907                         vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3908                 }
3909
3910                 /* set DCB mode of RX and TX of multiple queues */
3911                 eth_conf->rxmode.mq_mode =
3912                                 (enum rte_eth_rx_mq_mode)
3913                                         (rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3914                 eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3915         } else {
3916                 struct rte_eth_dcb_rx_conf *rx_conf =
3917                                 &eth_conf->rx_adv_conf.dcb_rx_conf;
3918                 struct rte_eth_dcb_tx_conf *tx_conf =
3919                                 &eth_conf->tx_adv_conf.dcb_tx_conf;
3920
3921                 memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3922
3923                 rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3924                 if (rc != 0)
3925                         return rc;
3926
3927                 rx_conf->nb_tcs = num_tcs;
3928                 tx_conf->nb_tcs = num_tcs;
3929
3930                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3931                         rx_conf->dcb_tc[i] = i % num_tcs;
3932                         tx_conf->dcb_tc[i] = i % num_tcs;
3933                 }
3934
3935                 eth_conf->rxmode.mq_mode =
3936                                 (enum rte_eth_rx_mq_mode)
3937                                         (rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3938                 eth_conf->rx_adv_conf.rss_conf = rss_conf;
3939                 eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3940         }
3941
3942         if (pfc_en)
3943                 eth_conf->dcb_capability_en =
3944                                 ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3945         else
3946                 eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3947
3948         return 0;
3949 }
3950
3951 int
3952 init_port_dcb_config(portid_t pid,
3953                      enum dcb_mode_enable dcb_mode,
3954                      enum rte_eth_nb_tcs num_tcs,
3955                      uint8_t pfc_en)
3956 {
3957         struct rte_eth_conf port_conf;
3958         struct rte_port *rte_port;
3959         int retval;
3960         uint16_t i;
3961
3962         if (num_procs > 1) {
3963                 printf("The multi-process feature doesn't support dcb.\n");
3964                 return -ENOTSUP;
3965         }
3966         rte_port = &ports[pid];
3967
3968         /* retain the original device configuration. */
3969         memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
3970
3971         /*set configuration of DCB in vt mode and DCB in non-vt mode*/
3972         retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3973         if (retval < 0)
3974                 return retval;
3975         port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3976
3977         /* re-configure the device . */
3978         retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3979         if (retval < 0)
3980                 return retval;
3981
3982         retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3983         if (retval != 0)
3984                 return retval;
3985
3986         /* If dev_info.vmdq_pool_base is greater than 0,
3987          * the queue id of vmdq pools is started after pf queues.
3988          */
3989         if (dcb_mode == DCB_VT_ENABLED &&
3990             rte_port->dev_info.vmdq_pool_base > 0) {
3991                 fprintf(stderr,
3992                         "VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3993                         pid);
3994                 return -1;
3995         }
3996
3997         /* Assume the ports in testpmd have the same dcb capability
3998          * and has the same number of rxq and txq in dcb mode
3999          */
4000         if (dcb_mode == DCB_VT_ENABLED) {
4001                 if (rte_port->dev_info.max_vfs > 0) {
4002                         nb_rxq = rte_port->dev_info.nb_rx_queues;
4003                         nb_txq = rte_port->dev_info.nb_tx_queues;
4004                 } else {
4005                         nb_rxq = rte_port->dev_info.max_rx_queues;
4006                         nb_txq = rte_port->dev_info.max_tx_queues;
4007                 }
4008         } else {
4009                 /*if vt is disabled, use all pf queues */
4010                 if (rte_port->dev_info.vmdq_pool_base == 0) {
4011                         nb_rxq = rte_port->dev_info.max_rx_queues;
4012                         nb_txq = rte_port->dev_info.max_tx_queues;
4013                 } else {
4014                         nb_rxq = (queueid_t)num_tcs;
4015                         nb_txq = (queueid_t)num_tcs;
4016
4017                 }
4018         }
4019         rx_free_thresh = 64;
4020
4021         memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
4022
4023         rxtx_port_config(rte_port);
4024         /* VLAN filter */
4025         rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
4026         for (i = 0; i < RTE_DIM(vlan_tags); i++)
4027                 rx_vft_set(pid, vlan_tags[i], 1);
4028
4029         retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
4030         if (retval != 0)
4031                 return retval;
4032
4033         rte_port->dcb_flag = 1;
4034
4035         /* Enter DCB configuration status */
4036         dcb_config = 1;
4037
4038         return 0;
4039 }
4040
4041 static void
4042 init_port(void)
4043 {
4044         int i;
4045
4046         /* Configuration of Ethernet ports. */
4047         ports = rte_zmalloc("testpmd: ports",
4048                             sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4049                             RTE_CACHE_LINE_SIZE);
4050         if (ports == NULL) {
4051                 rte_exit(EXIT_FAILURE,
4052                                 "rte_zmalloc(%d struct rte_port) failed\n",
4053                                 RTE_MAX_ETHPORTS);
4054         }
4055         for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4056                 ports[i].xstats_info.allocated = false;
4057         for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4058                 LIST_INIT(&ports[i].flow_tunnel_list);
4059         /* Initialize ports NUMA structures */
4060         memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4061         memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4062         memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4063 }
4064
4065 static void
4066 force_quit(void)
4067 {
4068         pmd_test_exit();
4069         prompt_exit();
4070 }
4071
4072 static void
4073 print_stats(void)
4074 {
4075         uint8_t i;
4076         const char clr[] = { 27, '[', '2', 'J', '\0' };
4077         const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4078
4079         /* Clear screen and move to top left */
4080         printf("%s%s", clr, top_left);
4081
4082         printf("\nPort statistics ====================================");
4083         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4084                 nic_stats_display(fwd_ports_ids[i]);
4085
4086         fflush(stdout);
4087 }
4088
4089 static void
4090 signal_handler(int signum)
4091 {
4092         if (signum == SIGINT || signum == SIGTERM) {
4093                 fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4094                         signum);
4095 #ifdef RTE_LIB_PDUMP
4096                 /* uninitialize packet capture framework */
4097                 rte_pdump_uninit();
4098 #endif
4099 #ifdef RTE_LIB_LATENCYSTATS
4100                 if (latencystats_enabled != 0)
4101                         rte_latencystats_uninit();
4102 #endif
4103                 force_quit();
4104                 /* Set flag to indicate the force termination. */
4105                 f_quit = 1;
4106                 /* exit with the expected status */
4107 #ifndef RTE_EXEC_ENV_WINDOWS
4108                 signal(signum, SIG_DFL);
4109                 kill(getpid(), signum);
4110 #endif
4111         }
4112 }
4113
4114 int
4115 main(int argc, char** argv)
4116 {
4117         int diag;
4118         portid_t port_id;
4119         uint16_t count;
4120         int ret;
4121
4122         signal(SIGINT, signal_handler);
4123         signal(SIGTERM, signal_handler);
4124
4125         testpmd_logtype = rte_log_register("testpmd");
4126         if (testpmd_logtype < 0)
4127                 rte_exit(EXIT_FAILURE, "Cannot register log type");
4128         rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4129
4130         diag = rte_eal_init(argc, argv);
4131         if (diag < 0)
4132                 rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4133                          rte_strerror(rte_errno));
4134
4135         ret = register_eth_event_callback();
4136         if (ret != 0)
4137                 rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4138
4139 #ifdef RTE_LIB_PDUMP
4140         /* initialize packet capture framework */
4141         rte_pdump_init();
4142 #endif
4143
4144         count = 0;
4145         RTE_ETH_FOREACH_DEV(port_id) {
4146                 ports_ids[count] = port_id;
4147                 count++;
4148         }
4149         nb_ports = (portid_t) count;
4150         if (nb_ports == 0)
4151                 TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4152
4153         /* allocate port structures, and init them */
4154         init_port();
4155
4156         set_def_fwd_config();
4157         if (nb_lcores == 0)
4158                 rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4159                          "Check the core mask argument\n");
4160
4161         /* Bitrate/latency stats disabled by default */
4162 #ifdef RTE_LIB_BITRATESTATS
4163         bitrate_enabled = 0;
4164 #endif
4165 #ifdef RTE_LIB_LATENCYSTATS
4166         latencystats_enabled = 0;
4167 #endif
4168
4169         /* on FreeBSD, mlockall() is disabled by default */
4170 #ifdef RTE_EXEC_ENV_FREEBSD
4171         do_mlockall = 0;
4172 #else
4173         do_mlockall = 1;
4174 #endif
4175
4176         argc -= diag;
4177         argv += diag;
4178         if (argc > 1)
4179                 launch_args_parse(argc, argv);
4180
4181 #ifndef RTE_EXEC_ENV_WINDOWS
4182         if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4183                 TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4184                         strerror(errno));
4185         }
4186 #endif
4187
4188         if (tx_first && interactive)
4189                 rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4190                                 "interactive mode.\n");
4191
4192         if (tx_first && lsc_interrupt) {
4193                 fprintf(stderr,
4194                         "Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4195                 lsc_interrupt = 0;
4196         }
4197
4198         if (!nb_rxq && !nb_txq)
4199                 fprintf(stderr,
4200                         "Warning: Either rx or tx queues should be non-zero\n");
4201
4202         if (nb_rxq > 1 && nb_rxq > nb_txq)
4203                 fprintf(stderr,
4204                         "Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4205                         nb_rxq, nb_txq);
4206
4207         init_config();
4208
4209         if (hot_plug) {
4210                 ret = rte_dev_hotplug_handle_enable();
4211                 if (ret) {
4212                         RTE_LOG(ERR, EAL,
4213                                 "fail to enable hotplug handling.");
4214                         return -1;
4215                 }
4216
4217                 ret = rte_dev_event_monitor_start();
4218                 if (ret) {
4219                         RTE_LOG(ERR, EAL,
4220                                 "fail to start device event monitoring.");
4221                         return -1;
4222                 }
4223
4224                 ret = rte_dev_event_callback_register(NULL,
4225                         dev_event_callback, NULL);
4226                 if (ret) {
4227                         RTE_LOG(ERR, EAL,
4228                                 "fail  to register device event callback\n");
4229                         return -1;
4230                 }
4231         }
4232
4233         if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4234                 rte_exit(EXIT_FAILURE, "Start ports failed\n");
4235
4236         /* set all ports to promiscuous mode by default */
4237         RTE_ETH_FOREACH_DEV(port_id) {
4238                 ret = rte_eth_promiscuous_enable(port_id);
4239                 if (ret != 0)
4240                         fprintf(stderr,
4241                                 "Error during enabling promiscuous mode for port %u: %s - ignore\n",
4242                                 port_id, rte_strerror(-ret));
4243         }
4244
4245         /* Init metrics library */
4246         rte_metrics_init(rte_socket_id());
4247
4248 #ifdef RTE_LIB_LATENCYSTATS
4249         if (latencystats_enabled != 0) {
4250                 int ret = rte_latencystats_init(1, NULL);
4251                 if (ret)
4252                         fprintf(stderr,
4253                                 "Warning: latencystats init() returned error %d\n",
4254                                 ret);
4255                 fprintf(stderr, "Latencystats running on lcore %d\n",
4256                         latencystats_lcore_id);
4257         }
4258 #endif
4259
4260         /* Setup bitrate stats */
4261 #ifdef RTE_LIB_BITRATESTATS
4262         if (bitrate_enabled != 0) {
4263                 bitrate_data = rte_stats_bitrate_create();
4264                 if (bitrate_data == NULL)
4265                         rte_exit(EXIT_FAILURE,
4266                                 "Could not allocate bitrate data.\n");
4267                 rte_stats_bitrate_reg(bitrate_data);
4268         }
4269 #endif
4270
4271 #ifdef RTE_LIB_CMDLINE
4272         if (strlen(cmdline_filename) != 0)
4273                 cmdline_read_from_file(cmdline_filename);
4274
4275         if (interactive == 1) {
4276                 if (auto_start) {
4277                         printf("Start automatic packet forwarding\n");
4278                         start_packet_forwarding(0);
4279                 }
4280                 prompt();
4281                 pmd_test_exit();
4282         } else
4283 #endif
4284         {
4285                 char c;
4286                 int rc;
4287
4288                 f_quit = 0;
4289
4290                 printf("No commandline core given, start packet forwarding\n");
4291                 start_packet_forwarding(tx_first);
4292                 if (stats_period != 0) {
4293                         uint64_t prev_time = 0, cur_time, diff_time = 0;
4294                         uint64_t timer_period;
4295
4296                         /* Convert to number of cycles */
4297                         timer_period = stats_period * rte_get_timer_hz();
4298
4299                         while (f_quit == 0) {
4300                                 cur_time = rte_get_timer_cycles();
4301                                 diff_time += cur_time - prev_time;
4302
4303                                 if (diff_time >= timer_period) {
4304                                         print_stats();
4305                                         /* Reset the timer */
4306                                         diff_time = 0;
4307                                 }
4308                                 /* Sleep to avoid unnecessary checks */
4309                                 prev_time = cur_time;
4310                                 rte_delay_us_sleep(US_PER_S);
4311                         }
4312                 }
4313
4314                 printf("Press enter to exit\n");
4315                 rc = read(0, &c, 1);
4316                 pmd_test_exit();
4317                 if (rc < 0)
4318                         return 1;
4319         }
4320
4321         ret = rte_eal_cleanup();
4322         if (ret != 0)
4323                 rte_exit(EXIT_FAILURE,
4324                          "EAL cleanup failed: %s\n", strerror(-ret));
4325
4326         return EXIT_SUCCESS;
4327 }