4 * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <sys/queue.h>
45 #include <netinet/in.h>
47 #include <linux/if_tun.h>
49 #include <sys/ioctl.h>
53 #include <rte_common.h>
55 #include <rte_memory.h>
56 #include <rte_memcpy.h>
57 #include <rte_memzone.h>
58 #include <rte_tailq.h>
60 #include <rte_per_lcore.h>
61 #include <rte_launch.h>
62 #include <rte_atomic.h>
63 #include <rte_lcore.h>
64 #include <rte_branch_prediction.h>
65 #include <rte_interrupts.h>
67 #include <rte_debug.h>
68 #include <rte_ether.h>
69 #include <rte_ethdev.h>
72 #include <rte_mempool.h>
74 #include <rte_string_fns.h>
75 #include <rte_cycles.h>
76 #include <rte_malloc.h>
79 /* Macros for printing using RTE_LOG */
80 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
82 /* Max size of a single packet */
83 #define MAX_PACKET_SZ 2048
85 /* Number of bytes needed for each mbuf */
87 (MAX_PACKET_SZ + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
89 /* Number of mbufs in mempool that is created */
90 #define NB_MBUF (8192 * 16)
92 /* How many packets to attempt to read from NIC in one go */
93 #define PKT_BURST_SZ 32
95 /* How many objects (mbufs) to keep in per-lcore mempool cache */
96 #define MEMPOOL_CACHE_SZ PKT_BURST_SZ
98 /* Number of RX ring descriptors */
101 /* Number of TX ring descriptors */
104 /* Total octets in ethernet header */
105 #define KNI_ENET_HEADER_SIZE 14
107 /* Total octets in the FCS */
108 #define KNI_ENET_FCS_SIZE 4
110 #define KNI_US_PER_SECOND 1000000
111 #define KNI_SECOND_PER_DAY 86400
114 * RX and TX Prefetch, Host, and Write-back threshold values should be
115 * carefully set for optimal performance. Consult the network
116 * controller's datasheet and supporting DPDK documentation for guidance
117 * on how these parameters should be set.
119 /* RX ring configuration */
120 static const struct rte_eth_rxconf rx_conf = {
122 .pthresh = 8, /* Ring prefetch threshold */
123 .hthresh = 8, /* Ring host threshold */
124 .wthresh = 4, /* Ring writeback threshold */
126 .rx_free_thresh = 0, /* Immediately free RX descriptors */
130 * These default values are optimized for use with the Intel(R) 82599 10 GbE
131 * Controller and the DPDK ixgbe PMD. Consider using other values for other
132 * network controllers and/or network drivers.
134 /* TX ring configuration */
135 static const struct rte_eth_txconf tx_conf = {
137 .pthresh = 36, /* Ring prefetch threshold */
138 .hthresh = 0, /* Ring host threshold */
139 .wthresh = 0, /* Ring writeback threshold */
141 .tx_free_thresh = 0, /* Use PMD default values */
142 .tx_rs_thresh = 0, /* Use PMD default values */
145 /* Options for configuring ethernet port */
146 static struct rte_eth_conf port_conf = {
148 .header_split = 0, /* Header Split disabled */
149 .hw_ip_checksum = 0, /* IP checksum offload disabled */
150 .hw_vlan_filter = 0, /* VLAN filtering disabled */
151 .jumbo_frame = 0, /* Jumbo Frame Support disabled */
152 .hw_strip_crc = 0, /* CRC stripped by hardware */
155 .mq_mode = ETH_MQ_TX_NONE,
159 /* Mempool for mbufs */
160 static struct rte_mempool * pktmbuf_pool = NULL;
162 /* Mask of enabled ports */
163 static uint32_t ports_mask = 0;
165 /* Mask of cores that read from NIC and write to tap */
166 static uint32_t input_cores_mask = 0;
168 /* Mask of cores that read from tap and write to NIC */
169 static uint32_t output_cores_mask = 0;
171 /* Structure type for recording kni interface specific stats */
172 struct kni_interface_stats {
173 /* number of pkts received from NIC, and sent to KNI */
176 /* number of pkts received from NIC, but failed to send to KNI */
179 /* number of pkts received from KNI, and sent to NIC */
182 /* number of pkts received from KNI, but failed to send to NIC */
186 /* Structure type for recording port specific information */
187 struct kni_port_info_t {
188 /* lcore id for ingress */
189 unsigned lcore_id_ingress;
191 /* lcore id for egress */
192 unsigned lcore_id_egress;
194 /* pointer to kni interface */
198 /* kni port specific information array*/
199 static struct kni_port_info_t kni_port_info[RTE_MAX_ETHPORTS];
201 /* kni device statistics array */
202 static struct kni_interface_stats kni_stats[RTE_MAX_ETHPORTS];
204 /* Get the pointer to kni interface */
205 static struct rte_kni * kni_lcore_to_kni(unsigned lcore_id);
207 static int kni_change_mtu(uint8_t port_id, unsigned new_mtu);
208 static int kni_config_network_interface(uint8_t port_id, uint8_t if_up);
210 static struct rte_kni_ops kni_ops = {
211 .change_mtu = kni_change_mtu,
212 .config_network_if = kni_config_network_interface,
215 static rte_atomic32_t kni_stop = RTE_ATOMIC32_INIT(0);
217 /* Print out statistics on packets handled */
223 printf("\n**KNI example application statistics**\n"
224 "====== ============== ============ ============ ============ ============\n"
225 " Port Lcore(RX/TX) rx_packets rx_dropped tx_packets tx_dropped\n"
226 "------ -------------- ------------ ------------ ------------ ------------\n");
227 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
228 if (kni_port_info[i].kni == NULL)
231 printf("%7d %10u/%2u %13"PRIu64" %13"PRIu64" %13"PRIu64" "
233 kni_port_info[i].lcore_id_ingress,
234 kni_port_info[i].lcore_id_egress,
235 kni_stats[i].rx_packets,
236 kni_stats[i].rx_dropped,
237 kni_stats[i].tx_packets,
238 kni_stats[i].tx_dropped);
240 printf("====== ============== ============ ============ ============ ============\n");
243 /* Custom handling of signals to handle stats and kni processing */
245 signal_handler(int signum)
247 /* When we receive a USR1 signal, print stats */
248 if (signum == SIGUSR1) {
252 /* When we receive a USR2 signal, reset stats */
253 if (signum == SIGUSR2) {
254 memset(&kni_stats, 0, sizeof(kni_stats));
255 printf("\n**Statistics have been reset**\n");
259 /* When we receive a RTMIN signal, stop kni processing */
260 if (signum == SIGRTMIN) {
261 printf("SIGRTMIN is received, and the KNI processing is "
263 rte_atomic32_inc(&kni_stop);
269 kni_burst_free_mbufs(struct rte_mbuf **pkts, unsigned num)
276 for (i = 0; i < num; i++) {
277 rte_pktmbuf_free(pkts[i]);
283 * Interface to burst rx and enqueue mbufs into rx_q
286 kni_ingress(struct rte_kni *kni)
288 uint8_t port_id = rte_kni_get_port_id(kni);
290 struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
292 if (kni == NULL || port_id >= RTE_MAX_ETHPORTS)
295 /* Burst rx from eth */
296 nb_rx = rte_eth_rx_burst(port_id, 0, pkts_burst, PKT_BURST_SZ);
297 if (nb_rx > PKT_BURST_SZ) {
298 RTE_LOG(ERR, APP, "Error receiving from eth\n");
302 /* Burst tx to kni */
303 num = rte_kni_tx_burst(kni, pkts_burst, nb_rx);
304 kni_stats[port_id].rx_packets += num;
306 rte_kni_handle_request(kni);
307 if (unlikely(num < nb_rx)) {
308 /* Free mbufs not tx to kni interface */
309 kni_burst_free_mbufs(&pkts_burst[num], nb_rx - num);
310 kni_stats[port_id].rx_dropped += nb_rx - num;
315 * Interface to dequeue mbufs from tx_q and burst tx
318 kni_egress(struct rte_kni *kni)
320 uint8_t port_id = rte_kni_get_port_id(kni);;
322 struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
324 if (kni == NULL || port_id >= RTE_MAX_ETHPORTS)
327 /* Burst rx from kni */
328 num = rte_kni_rx_burst(kni, pkts_burst, PKT_BURST_SZ);
329 if (num > PKT_BURST_SZ) {
330 RTE_LOG(ERR, APP, "Error receiving from KNI\n");
334 /* Burst tx to eth */
335 nb_tx = rte_eth_tx_burst(port_id, 0, pkts_burst, (uint16_t)num);
336 kni_stats[port_id].tx_packets += nb_tx;
338 if (unlikely(nb_tx < num)) {
339 /* Free mbufs not tx to NIC */
340 kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx);
341 kni_stats[port_id].tx_dropped += num - nb_tx;
345 /* Main processing loop */
347 main_loop(__rte_unused void *arg)
350 const unsigned lcore_id = rte_lcore_id();
351 struct rte_kni *kni = kni_lcore_to_kni(lcore_id);
354 pid = rte_kni_get_port_id(kni);
355 if (pid >= RTE_MAX_ETHPORTS)
356 rte_exit(EXIT_FAILURE, "Failure: port id >= %d\n",
359 if (kni_port_info[pid].lcore_id_ingress == lcore_id) {
360 /* Running on lcores for input packets */
361 RTE_LOG(INFO, APP, "Lcore %u is reading from "
362 "port %d\n", lcore_id, pid);
367 int32_t flag = rte_atomic32_read(&kni_stop);
373 } else if (kni_port_info[pid].lcore_id_egress == lcore_id) {
374 /* Running on lcores for output packets */
375 RTE_LOG(INFO, APP, "Lcore %u is writing to port %d\n",
381 int32_t flag = rte_atomic32_read(&kni_stop);
390 /* fallthrough to here if we don't have any work */
391 RTE_LOG(INFO, APP, "Lcore %u has nothing to do\n", lcore_id);
396 /* Display usage instructions */
398 print_usage(const char *prgname)
400 RTE_LOG(INFO, APP, "\nUsage: %s [EAL options] -- -p PORTMASK "
401 "-i IN_CORES -o OUT_CORES\n"
402 " -p PORTMASK: hex bitmask of ports to use\n"
403 " -i IN_CORES: hex bitmask of cores which read "
405 " -o OUT_CORES: hex bitmask of cores which write "
410 /* Convert string to unsigned number. 0 is returned if error occurs */
412 parse_unsigned(const char *portmask)
417 num = strtoul(portmask, &end, 16);
418 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
421 return (uint32_t)num;
425 kni_setup_port_affinities(uint8_t nb_port)
428 uint32_t in_lcore, out_lcore;
429 uint8_t rx_port = 0, tx_port = 0;
432 if (nb_port > RTE_MAX_ETHPORTS) {
433 RTE_LOG(ERR, APP, "The number of ports exceeds the maximum "
434 "number of 0x%x\n", RTE_MAX_ETHPORTS);
438 RTE_LCORE_FOREACH(i) {
439 in_lcore = input_cores_mask & (1 << i);
440 out_lcore = output_cores_mask & (1 << i);
442 /* Check if it is in input lcore or output lcore mask */
443 if (in_lcore == 0 && out_lcore == 0)
446 /* Check if it is in both input lcore and output lcore mask */
447 if (in_lcore != 0 && out_lcore != 0) {
448 RTE_LOG(ERR, APP, "Lcore 0x%x can not be used in both "
449 "input lcore and output lcore mask\n", i);
453 /* Check if the lcore is enabled or not */
454 if (rte_lcore_is_enabled(i) == 0) {
455 RTE_LOG(ERR, APP, "Lcore 0x%x is not enabled\n", i);
460 /* It is for packet receiving */
461 while ((rx_port < nb_port) &&
462 ((ports_mask & (1 << rx_port)) == 0))
465 if (rx_port >= nb_port) {
466 RTE_LOG(ERR, APP, "There is no enough ports "
467 "for ingress lcores\n");
470 kni_port_info[rx_port].lcore_id_ingress = i;
473 /* It is for packet transmitting */
474 while ((tx_port < nb_port) &&
475 ((ports_mask & (1 << tx_port)) == 0))
478 if (tx_port >= nb_port) {
479 RTE_LOG(ERR, APP, "There is no enough ports "
480 "for engree lcores\n");
483 kni_port_info[tx_port].lcore_id_egress = i;
488 /* Display all the port/lcore affinity */
489 for (pid = 0; pid < nb_port; pid++) {
490 RTE_LOG(INFO, APP, "Port%d, ingress lcore id: %u, "
491 "egress lcore id: %u\n", pid,
492 kni_port_info[pid].lcore_id_ingress,
493 kni_port_info[pid].lcore_id_egress);
499 static struct rte_kni *
500 kni_lcore_to_kni(unsigned lcore_id)
503 struct kni_port_info_t *p = kni_port_info;
505 for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
506 if (p[pid].kni != NULL && (p[pid].lcore_id_ingress == lcore_id
507 || p[pid].lcore_id_egress == lcore_id))
514 /* Parse the arguments given in the command line of the application */
516 parse_args(int argc, char **argv)
519 const char *prgname = argv[0];
521 /* Disable printing messages within getopt() */
524 /* Parse command line */
525 while ((opt = getopt(argc, argv, "i:o:p:")) != EOF) {
528 input_cores_mask = parse_unsigned(optarg);
531 output_cores_mask = parse_unsigned(optarg);
534 ports_mask = parse_unsigned(optarg);
537 print_usage(prgname);
538 rte_exit(EXIT_FAILURE, "Invalid option specified");
542 /* Check that options were parsed ok */
543 if (input_cores_mask == 0) {
544 print_usage(prgname);
545 rte_exit(EXIT_FAILURE, "IN_CORES not specified correctly");
547 if (output_cores_mask == 0) {
548 print_usage(prgname);
549 rte_exit(EXIT_FAILURE, "OUT_CORES not specified correctly");
551 if (ports_mask == 0) {
552 print_usage(prgname);
553 rte_exit(EXIT_FAILURE, "PORTMASK not specified correctly");
557 /* Initialise a single port on an Ethernet device */
559 init_port(uint8_t port)
563 /* Initialise device and RX/TX queues */
564 RTE_LOG(INFO, APP, "Initialising port %u ...\n", (unsigned)port);
566 ret = rte_eth_dev_configure(port, 1, 1, &port_conf);
568 rte_exit(EXIT_FAILURE, "Could not configure port%u (%d)",
569 (unsigned)port, ret);
571 ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, rte_eth_dev_socket_id(port),
572 &rx_conf, pktmbuf_pool);
574 rte_exit(EXIT_FAILURE, "Could not setup up RX queue for "
575 "port%u (%d)", (unsigned)port, ret);
577 ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, rte_eth_dev_socket_id(port),
580 rte_exit(EXIT_FAILURE, "Could not setup up TX queue for "
581 "port%u (%d)", (unsigned)port, ret);
583 ret = rte_eth_dev_start(port);
585 rte_exit(EXIT_FAILURE, "Could not start port%u (%d)",
586 (unsigned)port, ret);
588 rte_eth_promiscuous_enable(port);
591 /* Check the link status of all ports in up to 9s, and print them finally */
593 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
595 #define CHECK_INTERVAL 100 /* 100ms */
596 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
597 uint8_t portid, count, all_ports_up, print_flag = 0;
598 struct rte_eth_link link;
600 printf("\nChecking link status");
602 for (count = 0; count <= MAX_CHECK_TIME; count++) {
604 for (portid = 0; portid < port_num; portid++) {
605 if ((port_mask & (1 << portid)) == 0)
607 memset(&link, 0, sizeof(link));
608 rte_eth_link_get_nowait(portid, &link);
609 /* print link status if flag set */
610 if (print_flag == 1) {
611 if (link.link_status)
612 printf("Port %d Link Up - speed %u "
613 "Mbps - %s\n", (uint8_t)portid,
614 (unsigned)link.link_speed,
615 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
616 ("full-duplex") : ("half-duplex\n"));
618 printf("Port %d Link Down\n",
622 /* clear all_ports_up flag if any link down */
623 if (link.link_status == 0) {
628 /* after finally printing all link status, get out */
632 if (all_ports_up == 0) {
635 rte_delay_ms(CHECK_INTERVAL);
638 /* set the print_flag if all ports up or timeout */
639 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
646 /* Callback for request of changing MTU */
648 kni_change_mtu(uint8_t port_id, unsigned new_mtu)
651 struct rte_eth_conf conf;
653 if (port_id >= rte_eth_dev_count()) {
654 RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id);
658 RTE_LOG(INFO, APP, "Change MTU of port %d to %u\n", port_id, new_mtu);
660 /* Stop specific port */
661 rte_eth_dev_stop(port_id);
663 memcpy(&conf, &port_conf, sizeof(conf));
665 if (new_mtu > ETHER_MAX_LEN)
666 conf.rxmode.jumbo_frame = 1;
668 conf.rxmode.jumbo_frame = 0;
670 /* mtu + length of header + length of FCS = max pkt length */
671 conf.rxmode.max_rx_pkt_len = new_mtu + KNI_ENET_HEADER_SIZE +
673 ret = rte_eth_dev_configure(port_id, 1, 1, &conf);
675 RTE_LOG(ERR, APP, "Fail to reconfigure port %d\n", port_id);
679 /* Restart specific port */
680 ret = rte_eth_dev_start(port_id);
682 RTE_LOG(ERR, APP, "Fail to restart port %d\n", port_id);
689 /* Callback for request of configuring network interface up/down */
691 kni_config_network_interface(uint8_t port_id, uint8_t if_up)
695 if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) {
696 RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id);
700 RTE_LOG(INFO, APP, "Configure network interface of %d %s\n",
701 port_id, if_up ? "up" : "down");
703 if (if_up != 0) { /* Configure network interface up */
704 rte_eth_dev_stop(port_id);
705 ret = rte_eth_dev_start(port_id);
706 } else /* Configure network interface down */
707 rte_eth_dev_stop(port_id);
710 RTE_LOG(ERR, APP, "Failed to start port %d\n", port_id);
715 /* Initialise ports/queues etc. and start main loop on each core */
717 main(int argc, char** argv)
720 unsigned i, cfg_ports = 0;
721 uint8_t nb_sys_ports, port;
723 /* Associate signal_hanlder function with USR signals */
724 signal(SIGUSR1, signal_handler);
725 signal(SIGUSR2, signal_handler);
726 signal(SIGRTMIN, signal_handler);
729 ret = rte_eal_init(argc, argv);
731 rte_exit(EXIT_FAILURE, "Could not initialise EAL (%d)", ret);
735 /* Parse application arguments (after the EAL ones) */
736 parse_args(argc, argv);
738 /* Create the mbuf pool */
739 pktmbuf_pool = rte_mempool_create("mbuf_pool", NB_MBUF, MBUF_SZ,
741 sizeof(struct rte_pktmbuf_pool_private),
742 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL,
744 if (pktmbuf_pool == NULL) {
745 rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool");
749 /* Initialise PMD driver(s) */
750 ret = rte_pmd_init_all();
752 rte_exit(EXIT_FAILURE, "Could not initialise PMD (%d)", ret);
754 /* Scan PCI bus for recognised devices */
755 ret = rte_eal_pci_probe();
757 rte_exit(EXIT_FAILURE, "Could not probe PCI (%d)", ret);
759 /* Get number of ports found in scan */
760 nb_sys_ports = rte_eth_dev_count();
761 if (nb_sys_ports == 0)
762 rte_exit(EXIT_FAILURE, "No supported Ethernet devices found - "
763 "check that CONFIG_RTE_LIBRTE_IGB_PMD=y and/or "
764 "CONFIG_RTE_LIBRTE_IXGBE_PMD=y in the config file");
765 /* Find the number of configured ports in the port mask */
766 for (i = 0; i < sizeof(ports_mask) * 8; i++)
767 cfg_ports += !! (ports_mask & (1 << i));
769 if (cfg_ports > nb_sys_ports)
770 rte_exit(EXIT_FAILURE, "Port mask requires more ports than "
773 if (kni_setup_port_affinities(nb_sys_ports) < 0)
774 rte_exit(EXIT_FAILURE, "Fail to setup port affinities\n");
776 /* Initialise each port */
777 for (port = 0; port < nb_sys_ports; port++) {
780 /* Skip ports that are not enabled */
781 if ((ports_mask & (1 << port)) == 0) {
786 if (port >= RTE_MAX_ETHPORTS)
787 rte_exit(EXIT_FAILURE, "Can not use more than "
788 "%d ports for kni\n", RTE_MAX_ETHPORTS);
790 kni = rte_kni_create(port, MAX_PACKET_SZ, pktmbuf_pool,
793 rte_exit(EXIT_FAILURE, "Fail to create kni dev "
794 "for port: %d\n", port);
795 kni_port_info[port].kni = kni;
797 check_all_ports_link_status(nb_sys_ports, ports_mask);
799 /* Launch per-lcore function on every lcore */
800 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
801 RTE_LCORE_FOREACH_SLAVE(i) {
802 if (rte_eal_wait_lcore(i) < 0)
806 for (port = 0; port < nb_sys_ports; port++) {
807 struct rte_kni *kni = kni_port_info[port].kni;
810 rte_kni_release(kni);