4 * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include <sys/queue.h>
44 #include <netinet/in.h>
46 #include <linux/if_tun.h>
48 #include <sys/ioctl.h>
52 #include <rte_common.h>
54 #include <rte_memory.h>
55 #include <rte_memcpy.h>
56 #include <rte_memzone.h>
57 #include <rte_tailq.h>
59 #include <rte_per_lcore.h>
60 #include <rte_launch.h>
61 #include <rte_atomic.h>
62 #include <rte_lcore.h>
63 #include <rte_branch_prediction.h>
64 #include <rte_interrupts.h>
66 #include <rte_debug.h>
67 #include <rte_ether.h>
68 #include <rte_ethdev.h>
71 #include <rte_mempool.h>
73 #include <rte_string_fns.h>
74 #include <rte_cycles.h>
75 #include <rte_malloc.h>
78 /* Macros for printing using RTE_LOG */
79 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
81 /* Max size of a single packet */
82 #define MAX_PACKET_SZ 2048
84 /* Number of bytes needed for each mbuf */
86 (MAX_PACKET_SZ + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
88 /* Number of mbufs in mempool that is created */
89 #define NB_MBUF (8192 * 16)
91 /* How many packets to attempt to read from NIC in one go */
92 #define PKT_BURST_SZ 32
94 /* How many objects (mbufs) to keep in per-lcore mempool cache */
95 #define MEMPOOL_CACHE_SZ PKT_BURST_SZ
97 /* Number of RX ring descriptors */
100 /* Number of TX ring descriptors */
103 /* Total octets in ethernet header */
104 #define KNI_ENET_HEADER_SIZE 14
106 /* Total octets in the FCS */
107 #define KNI_ENET_FCS_SIZE 4
109 #define KNI_US_PER_SECOND 1000000
110 #define KNI_SECOND_PER_DAY 86400
113 * RX and TX Prefetch, Host, and Write-back threshold values should be
114 * carefully set for optimal performance. Consult the network
115 * controller's datasheet and supporting DPDK documentation for guidance
116 * on how these parameters should be set.
118 /* RX ring configuration */
119 static const struct rte_eth_rxconf rx_conf = {
121 .pthresh = 8, /* Ring prefetch threshold */
122 .hthresh = 8, /* Ring host threshold */
123 .wthresh = 4, /* Ring writeback threshold */
125 .rx_free_thresh = 0, /* Immediately free RX descriptors */
129 * These default values are optimized for use with the Intel(R) 82599 10 GbE
130 * Controller and the DPDK ixgbe PMD. Consider using other values for other
131 * network controllers and/or network drivers.
133 /* TX ring configuration */
134 static const struct rte_eth_txconf tx_conf = {
136 .pthresh = 36, /* Ring prefetch threshold */
137 .hthresh = 0, /* Ring host threshold */
138 .wthresh = 0, /* Ring writeback threshold */
140 .tx_free_thresh = 0, /* Use PMD default values */
141 .tx_rs_thresh = 0, /* Use PMD default values */
144 /* Options for configuring ethernet port */
145 static struct rte_eth_conf port_conf = {
147 .header_split = 0, /* Header Split disabled */
148 .hw_ip_checksum = 0, /* IP checksum offload disabled */
149 .hw_vlan_filter = 0, /* VLAN filtering disabled */
150 .jumbo_frame = 0, /* Jumbo Frame Support disabled */
151 .hw_strip_crc = 0, /* CRC stripped by hardware */
154 .mq_mode = ETH_MQ_TX_NONE,
158 /* Mempool for mbufs */
159 static struct rte_mempool * pktmbuf_pool = NULL;
161 /* Mask of enabled ports */
162 static uint32_t ports_mask = 0;
164 /* Mask of cores that read from NIC and write to tap */
165 static uint32_t input_cores_mask = 0;
167 /* Mask of cores that read from tap and write to NIC */
168 static uint32_t output_cores_mask = 0;
170 /* Structure type for recording kni interface specific stats */
171 struct kni_interface_stats {
172 /* number of pkts received from NIC, and sent to KNI */
175 /* number of pkts received from NIC, but failed to send to KNI */
178 /* number of pkts received from KNI, and sent to NIC */
181 /* number of pkts received from KNI, but failed to send to NIC */
185 /* Structure type for recording port specific information */
186 struct kni_port_info_t {
187 /* lcore id for ingress */
188 unsigned lcore_id_ingress;
190 /* lcore id for egress */
191 unsigned lcore_id_egress;
193 /* pointer to kni interface */
197 /* kni port specific information array*/
198 static struct kni_port_info_t kni_port_info[RTE_MAX_ETHPORTS];
200 /* kni device statistics array */
201 static struct kni_interface_stats kni_stats[RTE_MAX_ETHPORTS];
203 /* Get the pointer to kni interface */
204 static struct rte_kni * kni_lcore_to_kni(unsigned lcore_id);
206 static int kni_change_mtu(uint8_t port_id, unsigned new_mtu);
207 static int kni_config_network_interface(uint8_t port_id, uint8_t if_up);
209 static struct rte_kni_ops kni_ops = {
210 .change_mtu = kni_change_mtu,
211 .config_network_if = kni_config_network_interface,
214 static rte_atomic32_t kni_stop = RTE_ATOMIC32_INIT(0);
216 /* Print out statistics on packets handled */
222 printf("\n**KNI example application statistics**\n"
223 "====== ============== ============ ============ ============ ============\n"
224 " Port Lcore(RX/TX) rx_packets rx_dropped tx_packets tx_dropped\n"
225 "------ -------------- ------------ ------------ ------------ ------------\n");
226 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
227 if (kni_port_info[i].kni == NULL)
230 printf("%7d %10u/%2u %13"PRIu64" %13"PRIu64" %13"PRIu64" "
232 kni_port_info[i].lcore_id_ingress,
233 kni_port_info[i].lcore_id_egress,
234 kni_stats[i].rx_packets,
235 kni_stats[i].rx_dropped,
236 kni_stats[i].tx_packets,
237 kni_stats[i].tx_dropped);
239 printf("====== ============== ============ ============ ============ ============\n");
242 /* Custom handling of signals to handle stats and kni processing */
244 signal_handler(int signum)
246 /* When we receive a USR1 signal, print stats */
247 if (signum == SIGUSR1) {
251 /* When we receive a USR2 signal, reset stats */
252 if (signum == SIGUSR2) {
253 memset(&kni_stats, 0, sizeof(kni_stats));
254 printf("\n**Statistics have been reset**\n");
258 /* When we receive a RTMIN signal, stop kni processing */
259 if (signum == SIGRTMIN) {
260 printf("SIGRTMIN is received, and the KNI processing is "
262 rte_atomic32_inc(&kni_stop);
268 kni_burst_free_mbufs(struct rte_mbuf **pkts, unsigned num)
275 for (i = 0; i < num; i++) {
276 rte_pktmbuf_free(pkts[i]);
282 * Interface to burst rx and enqueue mbufs into rx_q
285 kni_ingress(struct rte_kni *kni)
287 uint8_t port_id = rte_kni_get_port_id(kni);
289 struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
291 if (kni == NULL || port_id >= RTE_MAX_ETHPORTS)
294 /* Burst rx from eth */
295 nb_rx = rte_eth_rx_burst(port_id, 0, pkts_burst, PKT_BURST_SZ);
296 if (nb_rx > PKT_BURST_SZ) {
297 RTE_LOG(ERR, APP, "Error receiving from eth\n");
301 /* Burst tx to kni */
302 num = rte_kni_tx_burst(kni, pkts_burst, nb_rx);
303 kni_stats[port_id].rx_packets += num;
305 rte_kni_handle_request(kni);
306 if (unlikely(num < nb_rx)) {
307 /* Free mbufs not tx to kni interface */
308 kni_burst_free_mbufs(&pkts_burst[num], nb_rx - num);
309 kni_stats[port_id].rx_dropped += nb_rx - num;
314 * Interface to dequeue mbufs from tx_q and burst tx
317 kni_egress(struct rte_kni *kni)
319 uint8_t port_id = rte_kni_get_port_id(kni);;
321 struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
323 if (kni == NULL || port_id >= RTE_MAX_ETHPORTS)
326 /* Burst rx from kni */
327 num = rte_kni_rx_burst(kni, pkts_burst, PKT_BURST_SZ);
328 if (num > PKT_BURST_SZ) {
329 RTE_LOG(ERR, APP, "Error receiving from KNI\n");
333 /* Burst tx to eth */
334 nb_tx = rte_eth_tx_burst(port_id, 0, pkts_burst, (uint16_t)num);
335 kni_stats[port_id].tx_packets += nb_tx;
337 if (unlikely(nb_tx < num)) {
338 /* Free mbufs not tx to NIC */
339 kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx);
340 kni_stats[port_id].tx_dropped += num - nb_tx;
344 /* Main processing loop */
346 main_loop(__rte_unused void *arg)
349 const unsigned lcore_id = rte_lcore_id();
350 struct rte_kni *kni = kni_lcore_to_kni(lcore_id);
353 pid = rte_kni_get_port_id(kni);
354 if (pid >= RTE_MAX_ETHPORTS)
355 rte_exit(EXIT_FAILURE, "Failure: port id >= %d\n",
358 if (kni_port_info[pid].lcore_id_ingress == lcore_id) {
359 /* Running on lcores for input packets */
360 RTE_LOG(INFO, APP, "Lcore %u is reading from "
361 "port %d\n", lcore_id, pid);
366 int32_t flag = rte_atomic32_read(&kni_stop);
372 } else if (kni_port_info[pid].lcore_id_egress == lcore_id) {
373 /* Running on lcores for output packets */
374 RTE_LOG(INFO, APP, "Lcore %u is writing to port %d\n",
380 int32_t flag = rte_atomic32_read(&kni_stop);
389 /* fallthrough to here if we don't have any work */
390 RTE_LOG(INFO, APP, "Lcore %u has nothing to do\n", lcore_id);
395 /* Display usage instructions */
397 print_usage(const char *prgname)
399 RTE_LOG(INFO, APP, "\nUsage: %s [EAL options] -- -p PORTMASK "
400 "-i IN_CORES -o OUT_CORES\n"
401 " -p PORTMASK: hex bitmask of ports to use\n"
402 " -i IN_CORES: hex bitmask of cores which read "
404 " -o OUT_CORES: hex bitmask of cores which write "
409 /* Convert string to unsigned number. 0 is returned if error occurs */
411 parse_unsigned(const char *portmask)
416 num = strtoul(portmask, &end, 16);
417 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
420 return (uint32_t)num;
424 kni_setup_port_affinities(uint8_t nb_port)
427 uint32_t in_lcore, out_lcore;
428 uint8_t rx_port = 0, tx_port = 0;
431 if (nb_port > RTE_MAX_ETHPORTS) {
432 RTE_LOG(ERR, APP, "The number of ports exceeds the maximum "
433 "number of 0x%x\n", RTE_MAX_ETHPORTS);
437 RTE_LCORE_FOREACH(i) {
438 in_lcore = input_cores_mask & (1 << i);
439 out_lcore = output_cores_mask & (1 << i);
441 /* Check if it is in input lcore or output lcore mask */
442 if (in_lcore == 0 && out_lcore == 0)
445 /* Check if it is in both input lcore and output lcore mask */
446 if (in_lcore != 0 && out_lcore != 0) {
447 RTE_LOG(ERR, APP, "Lcore 0x%x can not be used in both "
448 "input lcore and output lcore mask\n", i);
452 /* Check if the lcore is enabled or not */
453 if (rte_lcore_is_enabled(i) == 0) {
454 RTE_LOG(ERR, APP, "Lcore 0x%x is not enabled\n", i);
459 /* It is for packet receiving */
460 while ((rx_port < nb_port) &&
461 ((ports_mask & (1 << rx_port)) == 0))
464 if (rx_port >= nb_port) {
465 RTE_LOG(ERR, APP, "There is no enough ports "
466 "for ingress lcores\n");
469 kni_port_info[rx_port].lcore_id_ingress = i;
472 /* It is for packet transmitting */
473 while ((tx_port < nb_port) &&
474 ((ports_mask & (1 << tx_port)) == 0))
477 if (tx_port >= nb_port) {
478 RTE_LOG(ERR, APP, "There is no enough ports "
479 "for engree lcores\n");
482 kni_port_info[tx_port].lcore_id_egress = i;
487 /* Display all the port/lcore affinity */
488 for (pid = 0; pid < nb_port; pid++) {
489 RTE_LOG(INFO, APP, "Port%d, ingress lcore id: %u, "
490 "egress lcore id: %u\n", pid,
491 kni_port_info[pid].lcore_id_ingress,
492 kni_port_info[pid].lcore_id_egress);
498 static struct rte_kni *
499 kni_lcore_to_kni(unsigned lcore_id)
502 struct kni_port_info_t *p = kni_port_info;
504 for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
505 if (p[pid].kni != NULL && (p[pid].lcore_id_ingress == lcore_id
506 || p[pid].lcore_id_egress == lcore_id))
513 /* Parse the arguments given in the command line of the application */
515 parse_args(int argc, char **argv)
518 const char *prgname = argv[0];
520 /* Disable printing messages within getopt() */
523 /* Parse command line */
524 while ((opt = getopt(argc, argv, "i:o:p:")) != EOF) {
527 input_cores_mask = parse_unsigned(optarg);
530 output_cores_mask = parse_unsigned(optarg);
533 ports_mask = parse_unsigned(optarg);
536 print_usage(prgname);
537 rte_exit(EXIT_FAILURE, "Invalid option specified");
541 /* Check that options were parsed ok */
542 if (input_cores_mask == 0) {
543 print_usage(prgname);
544 rte_exit(EXIT_FAILURE, "IN_CORES not specified correctly");
546 if (output_cores_mask == 0) {
547 print_usage(prgname);
548 rte_exit(EXIT_FAILURE, "OUT_CORES not specified correctly");
550 if (ports_mask == 0) {
551 print_usage(prgname);
552 rte_exit(EXIT_FAILURE, "PORTMASK not specified correctly");
556 /* Initialise a single port on an Ethernet device */
558 init_port(uint8_t port)
562 /* Initialise device and RX/TX queues */
563 RTE_LOG(INFO, APP, "Initialising port %u ...\n", (unsigned)port);
565 ret = rte_eth_dev_configure(port, 1, 1, &port_conf);
567 rte_exit(EXIT_FAILURE, "Could not configure port%u (%d)",
568 (unsigned)port, ret);
570 ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, rte_eth_dev_socket_id(port),
571 &rx_conf, pktmbuf_pool);
573 rte_exit(EXIT_FAILURE, "Could not setup up RX queue for "
574 "port%u (%d)", (unsigned)port, ret);
576 ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, rte_eth_dev_socket_id(port),
579 rte_exit(EXIT_FAILURE, "Could not setup up TX queue for "
580 "port%u (%d)", (unsigned)port, ret);
582 ret = rte_eth_dev_start(port);
584 rte_exit(EXIT_FAILURE, "Could not start port%u (%d)",
585 (unsigned)port, ret);
587 rte_eth_promiscuous_enable(port);
590 /* Check the link status of all ports in up to 9s, and print them finally */
592 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
594 #define CHECK_INTERVAL 100 /* 100ms */
595 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
596 uint8_t portid, count, all_ports_up, print_flag = 0;
597 struct rte_eth_link link;
599 printf("\nChecking link status");
601 for (count = 0; count <= MAX_CHECK_TIME; count++) {
603 for (portid = 0; portid < port_num; portid++) {
604 if ((port_mask & (1 << portid)) == 0)
606 memset(&link, 0, sizeof(link));
607 rte_eth_link_get_nowait(portid, &link);
608 /* print link status if flag set */
609 if (print_flag == 1) {
610 if (link.link_status)
611 printf("Port %d Link Up - speed %u "
612 "Mbps - %s\n", (uint8_t)portid,
613 (unsigned)link.link_speed,
614 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
615 ("full-duplex") : ("half-duplex\n"));
617 printf("Port %d Link Down\n",
621 /* clear all_ports_up flag if any link down */
622 if (link.link_status == 0) {
627 /* after finally printing all link status, get out */
631 if (all_ports_up == 0) {
634 rte_delay_ms(CHECK_INTERVAL);
637 /* set the print_flag if all ports up or timeout */
638 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
645 /* Callback for request of changing MTU */
647 kni_change_mtu(uint8_t port_id, unsigned new_mtu)
650 struct rte_eth_conf conf;
652 if (port_id >= rte_eth_dev_count()) {
653 RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id);
657 RTE_LOG(INFO, APP, "Change MTU of port %d to %u\n", port_id, new_mtu);
659 /* Stop specific port */
660 rte_eth_dev_stop(port_id);
662 memcpy(&conf, &port_conf, sizeof(conf));
664 if (new_mtu > ETHER_MAX_LEN)
665 conf.rxmode.jumbo_frame = 1;
667 conf.rxmode.jumbo_frame = 0;
669 /* mtu + length of header + length of FCS = max pkt length */
670 conf.rxmode.max_rx_pkt_len = new_mtu + KNI_ENET_HEADER_SIZE +
672 ret = rte_eth_dev_configure(port_id, 1, 1, &conf);
674 RTE_LOG(ERR, APP, "Fail to reconfigure port %d\n", port_id);
678 /* Restart specific port */
679 ret = rte_eth_dev_start(port_id);
681 RTE_LOG(ERR, APP, "Fail to restart port %d\n", port_id);
688 /* Callback for request of configuring network interface up/down */
690 kni_config_network_interface(uint8_t port_id, uint8_t if_up)
694 if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) {
695 RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id);
699 RTE_LOG(INFO, APP, "Configure network interface of %d %s\n",
700 port_id, if_up ? "up" : "down");
702 if (if_up != 0) { /* Configure network interface up */
703 rte_eth_dev_stop(port_id);
704 ret = rte_eth_dev_start(port_id);
705 } else /* Configure network interface down */
706 rte_eth_dev_stop(port_id);
709 RTE_LOG(ERR, APP, "Failed to start port %d\n", port_id);
714 /* Initialise ports/queues etc. and start main loop on each core */
716 main(int argc, char** argv)
719 unsigned i, cfg_ports = 0;
720 uint8_t nb_sys_ports, port;
722 /* Associate signal_hanlder function with USR signals */
723 signal(SIGUSR1, signal_handler);
724 signal(SIGUSR2, signal_handler);
725 signal(SIGRTMIN, signal_handler);
728 ret = rte_eal_init(argc, argv);
730 rte_exit(EXIT_FAILURE, "Could not initialise EAL (%d)", ret);
734 /* Parse application arguments (after the EAL ones) */
735 parse_args(argc, argv);
737 /* Create the mbuf pool */
738 pktmbuf_pool = rte_mempool_create("mbuf_pool", NB_MBUF, MBUF_SZ,
740 sizeof(struct rte_pktmbuf_pool_private),
741 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL,
743 if (pktmbuf_pool == NULL) {
744 rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool");
748 /* Initialise PMD driver(s) */
749 ret = rte_pmd_init_all();
751 rte_exit(EXIT_FAILURE, "Could not initialise PMD (%d)", ret);
753 /* Scan PCI bus for recognised devices */
754 ret = rte_eal_pci_probe();
756 rte_exit(EXIT_FAILURE, "Could not probe PCI (%d)", ret);
758 /* Get number of ports found in scan */
759 nb_sys_ports = rte_eth_dev_count();
760 if (nb_sys_ports == 0)
761 rte_exit(EXIT_FAILURE, "No supported Ethernet devices found - "
762 "check that CONFIG_RTE_LIBRTE_IGB_PMD=y and/or "
763 "CONFIG_RTE_LIBRTE_IXGBE_PMD=y in the config file");
764 /* Find the number of configured ports in the port mask */
765 for (i = 0; i < sizeof(ports_mask) * 8; i++)
766 cfg_ports += !! (ports_mask & (1 << i));
768 if (cfg_ports > nb_sys_ports)
769 rte_exit(EXIT_FAILURE, "Port mask requires more ports than "
772 if (kni_setup_port_affinities(nb_sys_ports) < 0)
773 rte_exit(EXIT_FAILURE, "Fail to setup port affinities\n");
775 /* Initialise each port */
776 for (port = 0; port < nb_sys_ports; port++) {
779 /* Skip ports that are not enabled */
780 if ((ports_mask & (1 << port)) == 0) {
785 if (port >= RTE_MAX_ETHPORTS)
786 rte_exit(EXIT_FAILURE, "Can not use more than "
787 "%d ports for kni\n", RTE_MAX_ETHPORTS);
789 kni = rte_kni_create(port, MAX_PACKET_SZ, pktmbuf_pool,
792 rte_exit(EXIT_FAILURE, "Fail to create kni dev "
793 "for port: %d\n", port);
794 kni_port_info[port].kni = kni;
796 check_all_ports_link_status(nb_sys_ports, ports_mask);
798 /* Launch per-lcore function on every lcore */
799 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
800 RTE_LCORE_FOREACH_SLAVE(i) {
801 if (rte_eal_wait_lcore(i) < 0)
805 for (port = 0; port < nb_sys_ports; port++) {
806 struct rte_kni *kni = kni_port_info[port].kni;
809 rte_kni_release(kni);