4 * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <sys/queue.h>
45 #include <netinet/in.h>
47 #include <linux/if_tun.h>
49 #include <sys/ioctl.h>
53 #include <rte_common.h>
55 #include <rte_memory.h>
56 #include <rte_memcpy.h>
57 #include <rte_memzone.h>
58 #include <rte_tailq.h>
60 #include <rte_per_lcore.h>
61 #include <rte_launch.h>
62 #include <rte_atomic.h>
63 #include <rte_lcore.h>
64 #include <rte_branch_prediction.h>
65 #include <rte_interrupts.h>
67 #include <rte_debug.h>
68 #include <rte_ether.h>
69 #include <rte_ethdev.h>
72 #include <rte_mempool.h>
74 #include <rte_string_fns.h>
75 #include <rte_cycles.h>
76 #include <rte_malloc.h>
79 /* Macros for printing using RTE_LOG */
80 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
82 /* Max size of a single packet */
83 #define MAX_PACKET_SZ 2048
85 /* Number of bytes needed for each mbuf */
87 (MAX_PACKET_SZ + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
89 /* Number of mbufs in mempool that is created */
90 #define NB_MBUF (8192 * 16)
92 /* How many packets to attempt to read from NIC in one go */
93 #define PKT_BURST_SZ 32
95 /* How many objects (mbufs) to keep in per-lcore mempool cache */
96 #define MEMPOOL_CACHE_SZ PKT_BURST_SZ
98 /* Number of RX ring descriptors */
101 /* Number of TX ring descriptors */
104 /* Total octets in ethernet header */
105 #define KNI_ENET_HEADER_SIZE 14
107 /* Total octets in the FCS */
108 #define KNI_ENET_FCS_SIZE 4
111 * RX and TX Prefetch, Host, and Write-back threshold values should be
112 * carefully set for optimal performance. Consult the network
113 * controller's datasheet and supporting DPDK documentation for guidance
114 * on how these parameters should be set.
116 /* RX ring configuration */
117 static const struct rte_eth_rxconf rx_conf = {
119 .pthresh = 8, /* Ring prefetch threshold */
120 .hthresh = 8, /* Ring host threshold */
121 .wthresh = 4, /* Ring writeback threshold */
123 .rx_free_thresh = 0, /* Immediately free RX descriptors */
127 * These default values are optimized for use with the Intel(R) 82599 10 GbE
128 * Controller and the DPDK ixgbe PMD. Consider using other values for other
129 * network controllers and/or network drivers.
131 /* TX ring configuration */
132 static const struct rte_eth_txconf tx_conf = {
134 .pthresh = 36, /* Ring prefetch threshold */
135 .hthresh = 0, /* Ring host threshold */
136 .wthresh = 0, /* Ring writeback threshold */
138 .tx_free_thresh = 0, /* Use PMD default values */
139 .tx_rs_thresh = 0, /* Use PMD default values */
142 /* Options for configuring ethernet port */
143 static struct rte_eth_conf port_conf = {
145 .header_split = 0, /* Header Split disabled */
146 .hw_ip_checksum = 0, /* IP checksum offload disabled */
147 .hw_vlan_filter = 0, /* VLAN filtering disabled */
148 .jumbo_frame = 0, /* Jumbo Frame Support disabled */
149 .hw_strip_crc = 0, /* CRC stripped by hardware */
152 .mq_mode = ETH_MQ_TX_NONE,
156 /* Mempool for mbufs */
157 static struct rte_mempool * pktmbuf_pool = NULL;
159 /* Mask of enabled ports */
160 static uint32_t ports_mask = 0;
162 /* Mask of cores that read from NIC and write to tap */
163 static uint32_t input_cores_mask = 0;
165 /* Mask of cores that read from tap and write to NIC */
166 static uint32_t output_cores_mask = 0;
168 /* Structure type for recording kni interface specific stats */
169 struct kni_interface_stats {
170 /* number of pkts received from NIC, and sent to KNI */
173 /* number of pkts received from NIC, but failed to send to KNI */
176 /* number of pkts received from KNI, and sent to NIC */
179 /* number of pkts received from KNI, but failed to send to NIC */
183 /* Structure type for recording port specific information */
184 struct kni_port_info_t {
185 /* lcore id for ingress */
186 unsigned lcore_id_ingress;
188 /* lcore id for egress */
189 unsigned lcore_id_egress;
191 /* pointer to kni interface */
195 /* kni port specific information array*/
196 static struct kni_port_info_t kni_port_info[RTE_MAX_ETHPORTS];
198 /* kni device statistics array */
199 static struct kni_interface_stats kni_stats[RTE_MAX_ETHPORTS];
201 /* Get the pointer to kni interface */
202 static struct rte_kni * kni_lcore_to_kni(unsigned lcore_id);
204 static int kni_change_mtu(uint8_t port_id, unsigned new_mtu);
205 static int kni_config_network_interface(uint8_t port_id, uint8_t if_up);
207 static struct rte_kni_ops kni_ops = {
208 .change_mtu = kni_change_mtu,
209 .config_network_if = kni_config_network_interface,
212 /* Print out statistics on packets handled */
218 printf("\n**KNI example application statistics**\n"
219 "====== ============== ============ ============ ============ ============\n"
220 " Port Lcore(RX/TX) rx_packets rx_dropped tx_packets tx_dropped\n"
221 "------ -------------- ------------ ------------ ------------ ------------\n");
222 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
223 if (kni_port_info[i].kni == NULL)
226 printf("%7d %10u/%2u %13"PRIu64" %13"PRIu64" %13"PRIu64" "
228 kni_port_info[i].lcore_id_ingress,
229 kni_port_info[i].lcore_id_egress,
230 kni_stats[i].rx_packets,
231 kni_stats[i].rx_dropped,
232 kni_stats[i].tx_packets,
233 kni_stats[i].tx_dropped);
235 printf("====== ============== ============ ============ ============ ============\n");
238 /* Custom handling of signals to handle stats */
240 signal_handler(int signum)
242 /* When we receive a USR1 signal, print stats */
243 if (signum == SIGUSR1) {
247 /* When we receive a USR2 signal, reset stats */
248 if (signum == SIGUSR2) {
249 memset(&kni_stats, 0, sizeof(kni_stats));
250 printf("\n**Statistics have been reset**\n");
256 kni_burst_free_mbufs(struct rte_mbuf **pkts, unsigned num)
263 for (i = 0; i < num; i++) {
264 rte_pktmbuf_free(pkts[i]);
270 * Interface to burst rx and enqueue mbufs into rx_q
273 kni_ingress(struct rte_kni *kni)
275 uint8_t port_id = rte_kni_get_port_id(kni);
277 struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
279 if (kni == NULL || port_id >= RTE_MAX_ETHPORTS)
282 /* Burst rx from eth */
283 nb_rx = rte_eth_rx_burst(port_id, 0, pkts_burst, PKT_BURST_SZ);
284 if (nb_rx > PKT_BURST_SZ) {
285 RTE_LOG(ERR, APP, "Error receiving from eth\n");
289 /* Burst tx to kni */
290 num = rte_kni_tx_burst(kni, pkts_burst, nb_rx);
291 kni_stats[port_id].rx_packets += num;
293 if (unlikely(num < nb_rx)) {
294 /* Free mbufs not tx to kni interface */
295 kni_burst_free_mbufs(&pkts_burst[num], nb_rx - num);
296 kni_stats[port_id].rx_dropped += nb_rx - num;
301 * Interface to dequeue mbufs from tx_q and burst tx
304 kni_egress(struct rte_kni *kni)
306 uint8_t port_id = rte_kni_get_port_id(kni);;
308 struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
310 if (kni == NULL || port_id >= RTE_MAX_ETHPORTS)
313 /* Burst rx from kni */
314 num = rte_kni_rx_burst(kni, pkts_burst, PKT_BURST_SZ);
315 if (num > PKT_BURST_SZ) {
316 RTE_LOG(ERR, APP, "Error receiving from KNI\n");
320 /* Burst tx to eth */
321 nb_tx = rte_eth_tx_burst(port_id, 0, pkts_burst, (uint16_t)num);
322 kni_stats[port_id].tx_packets += nb_tx;
324 if (unlikely(nb_tx < num)) {
325 /* Free mbufs not tx to NIC */
326 kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx);
327 kni_stats[port_id].tx_dropped += num - nb_tx;
331 /* Main processing loop */
332 static __attribute__((noreturn)) int
333 main_loop(__rte_unused void *arg)
336 const unsigned lcore_id = rte_lcore_id();
337 struct rte_kni *kni = kni_lcore_to_kni(lcore_id);
340 RTE_LOG(INFO, APP, "Lcore %u has nothing to do\n", lcore_id);
342 ; /* loop doing nothing */
344 pid = rte_kni_get_port_id(kni);
345 if (pid >= RTE_MAX_ETHPORTS)
346 rte_exit(EXIT_FAILURE, "Failure: port id >= %d\n",
349 if (kni_port_info[pid].lcore_id_ingress == lcore_id) {
350 /* Running on lcores for input packets */
351 RTE_LOG(INFO, APP, "Lcore %u is reading from "
352 "port %d\n", lcore_id, pid);
358 } else if (kni_port_info[pid].lcore_id_egress == lcore_id) {
359 /* Running on lcores for output packets */
360 RTE_LOG(INFO, APP, "Lcore %u is writing to port %d\n",
368 RTE_LOG(INFO, APP, "Lcore %u has nothing to do\n",
371 ; /* loop doing nothing */
376 /* Display usage instructions */
378 print_usage(const char *prgname)
380 RTE_LOG(INFO, APP, "\nUsage: %s [EAL options] -- -p PORTMASK "
381 "-i IN_CORES -o OUT_CORES\n"
382 " -p PORTMASK: hex bitmask of ports to use\n"
383 " -i IN_CORES: hex bitmask of cores which read "
385 " -o OUT_CORES: hex bitmask of cores which write to NIC\n",
389 /* Convert string to unsigned number. 0 is returned if error occurs */
391 parse_unsigned(const char *portmask)
396 num = strtoul(portmask, &end, 16);
397 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
400 return (uint32_t)num;
404 kni_setup_port_affinities(uint8_t nb_port)
407 uint32_t in_lcore, out_lcore;
408 uint8_t rx_port = 0, tx_port = 0;
411 if (nb_port > RTE_MAX_ETHPORTS) {
412 RTE_LOG(ERR, APP, "The number of ports exceeds the maximum "
413 "number of 0x%x\n", RTE_MAX_ETHPORTS);
417 RTE_LCORE_FOREACH(i) {
418 in_lcore = input_cores_mask & (1 << i);
419 out_lcore = output_cores_mask & (1 << i);
421 /* Check if it is in input lcore or output lcore mask */
422 if (in_lcore == 0 && out_lcore == 0)
425 /* Check if it is in both input lcore and output lcore mask */
426 if (in_lcore != 0 && out_lcore != 0) {
427 RTE_LOG(ERR, APP, "Lcore 0x%x can not be used in both "
428 "input lcore and output lcore mask\n", i);
432 /* Check if the lcore is enabled or not */
433 if (rte_lcore_is_enabled(i) == 0) {
434 RTE_LOG(ERR, APP, "Lcore 0x%x is not enabled\n", i);
439 /* It is be for packet receiving */
440 while ((rx_port < nb_port) &&
441 ((ports_mask & (1 << rx_port)) == 0))
444 if (rx_port >= nb_port) {
445 RTE_LOG(ERR, APP, "There is no enough ports "
446 "for ingress lcores\n");
449 kni_port_info[rx_port].lcore_id_ingress = i;
452 /* It is for packet transmitting */
453 while ((tx_port < nb_port) &&
454 ((ports_mask & (1 << tx_port)) == 0))
457 if (tx_port >= nb_port) {
458 RTE_LOG(ERR, APP, "There is no enough ports "
459 "for engree lcores\n");
462 kni_port_info[tx_port].lcore_id_egress = i;
467 /* Display all the port/lcore affinity */
468 for (pid = 0; pid < nb_port; pid++) {
469 RTE_LOG(INFO, APP, "Port%d, ingress lcore id: %u, "
470 "egress lcore id: %u\n", pid,
471 kni_port_info[pid].lcore_id_ingress,
472 kni_port_info[pid].lcore_id_egress);
478 static struct rte_kni *
479 kni_lcore_to_kni(unsigned lcore_id)
482 struct kni_port_info_t *p = kni_port_info;
484 for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
485 if (p[pid].kni != NULL && (p[pid].lcore_id_ingress == lcore_id
486 || p[pid].lcore_id_egress == lcore_id))
493 /* Parse the arguments given in the command line of the application */
495 parse_args(int argc, char **argv)
498 const char *prgname = argv[0];
500 /* Disable printing messages within getopt() */
503 /* Parse command line */
504 while ((opt = getopt(argc, argv, "i:o:p:")) != EOF) {
507 input_cores_mask = parse_unsigned(optarg);
510 output_cores_mask = parse_unsigned(optarg);
513 ports_mask = parse_unsigned(optarg);
516 print_usage(prgname);
517 rte_exit(EXIT_FAILURE, "Invalid option specified");
521 /* Check that options were parsed ok */
522 if (input_cores_mask == 0) {
523 print_usage(prgname);
524 rte_exit(EXIT_FAILURE, "IN_CORES not specified correctly");
526 if (output_cores_mask == 0) {
527 print_usage(prgname);
528 rte_exit(EXIT_FAILURE, "OUT_CORES not specified correctly");
530 if (ports_mask == 0) {
531 print_usage(prgname);
532 rte_exit(EXIT_FAILURE, "PORTMASK not specified correctly");
536 /* Initialise a single port on an Ethernet device */
538 init_port(uint8_t port)
542 /* Initialise device and RX/TX queues */
543 RTE_LOG(INFO, APP, "Initialising port %u ...\n", (unsigned)port);
545 ret = rte_eth_dev_configure(port, 1, 1, &port_conf);
547 rte_exit(EXIT_FAILURE, "Could not configure port%u (%d)",
548 (unsigned)port, ret);
550 ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, rte_eth_dev_socket_id(port),
551 &rx_conf, pktmbuf_pool);
553 rte_exit(EXIT_FAILURE, "Could not setup up RX queue for "
554 "port%u (%d)", (unsigned)port, ret);
556 ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, rte_eth_dev_socket_id(port),
559 rte_exit(EXIT_FAILURE, "Could not setup up TX queue for "
560 "port%u (%d)", (unsigned)port, ret);
562 ret = rte_eth_dev_start(port);
564 rte_exit(EXIT_FAILURE, "Could not start port%u (%d)",
565 (unsigned)port, ret);
567 rte_eth_promiscuous_enable(port);
570 /* Check the link status of all ports in up to 9s, and print them finally */
572 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
574 #define CHECK_INTERVAL 100 /* 100ms */
575 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
576 uint8_t portid, count, all_ports_up, print_flag = 0;
577 struct rte_eth_link link;
579 printf("\nChecking link status");
581 for (count = 0; count <= MAX_CHECK_TIME; count++) {
583 for (portid = 0; portid < port_num; portid++) {
584 if ((port_mask & (1 << portid)) == 0)
586 memset(&link, 0, sizeof(link));
587 rte_eth_link_get_nowait(portid, &link);
588 /* print link status if flag set */
589 if (print_flag == 1) {
590 if (link.link_status)
591 printf("Port %d Link Up - speed %u "
592 "Mbps - %s\n", (uint8_t)portid,
593 (unsigned)link.link_speed,
594 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
595 ("full-duplex") : ("half-duplex\n"));
597 printf("Port %d Link Down\n",
601 /* clear all_ports_up flag if any link down */
602 if (link.link_status == 0) {
607 /* after finally printing all link status, get out */
611 if (all_ports_up == 0) {
614 rte_delay_ms(CHECK_INTERVAL);
617 /* set the print_flag if all ports up or timeout */
618 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
625 /* Callback for request of changing MTU */
627 kni_change_mtu(uint8_t port_id, unsigned new_mtu)
630 struct rte_eth_conf conf;
632 if (port_id >= rte_eth_dev_count()) {
633 RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id);
637 RTE_LOG(INFO, APP, "Change MTU of port %d to %u\n", port_id, new_mtu);
639 /* Stop specific port */
640 rte_eth_dev_stop(port_id);
642 memcpy(&conf, &port_conf, sizeof(conf));
644 if (new_mtu > ETHER_MAX_LEN)
645 conf.rxmode.jumbo_frame = 1;
647 conf.rxmode.jumbo_frame = 0;
649 /* mtu + length of header + length of FCS = max pkt length */
650 conf.rxmode.max_rx_pkt_len = new_mtu + KNI_ENET_HEADER_SIZE +
652 ret = rte_eth_dev_configure(port_id, 1, 1, &conf);
654 RTE_LOG(ERR, APP, "Fail to reconfigure port %d\n", port_id);
658 /* Restart specific port */
659 ret = rte_eth_dev_start(port_id);
661 RTE_LOG(ERR, APP, "Fail to restart port %d\n", port_id);
668 /* Callback for request of configuring network interface up/down */
670 kni_config_network_interface(uint8_t port_id, uint8_t if_up)
674 if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) {
675 RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id);
679 RTE_LOG(INFO, APP, "Configure network interface of %d %s\n",
680 port_id, if_up ? "up" : "down");
682 if (if_up != 0) { /* Configure network interface up */
683 rte_eth_dev_stop(port_id);
684 ret = rte_eth_dev_start(port_id);
685 } else /* Configure network interface down */
686 rte_eth_dev_stop(port_id);
689 RTE_LOG(ERR, APP, "Failed to start port %d\n", port_id);
694 /* Initialise ports/queues etc. and start main loop on each core */
696 main(int argc, char** argv)
699 unsigned i, cfg_ports = 0;
700 uint8_t nb_sys_ports, port;
702 /* Associate signal_hanlder function with USR signals */
703 signal(SIGUSR1, signal_handler);
704 signal(SIGUSR2, signal_handler);
707 ret = rte_eal_init(argc, argv);
709 rte_exit(EXIT_FAILURE, "Could not initialise EAL (%d)", ret);
713 /* Parse application arguments (after the EAL ones) */
714 parse_args(argc, argv);
716 /* Create the mbuf pool */
717 pktmbuf_pool = rte_mempool_create("mbuf_pool", NB_MBUF, MBUF_SZ,
719 sizeof(struct rte_pktmbuf_pool_private),
720 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL,
722 if (pktmbuf_pool == NULL) {
723 rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool");
727 /* Initialise PMD driver(s) */
728 ret = rte_pmd_init_all();
730 rte_exit(EXIT_FAILURE, "Could not initialise PMD (%d)", ret);
732 /* Scan PCI bus for recognised devices */
733 ret = rte_eal_pci_probe();
735 rte_exit(EXIT_FAILURE, "Could not probe PCI (%d)", ret);
737 /* Get number of ports found in scan */
738 nb_sys_ports = rte_eth_dev_count();
739 if (nb_sys_ports == 0)
740 rte_exit(EXIT_FAILURE, "No supported Ethernet devices found - "
741 "check that CONFIG_RTE_LIBRTE_IGB_PMD=y and/or "
742 "CONFIG_RTE_LIBRTE_IXGBE_PMD=y in the config file");
743 /* Find the number of configured ports in the port mask */
744 for (i = 0; i < sizeof(ports_mask) * 8; i++)
745 cfg_ports += !! (ports_mask & (1 << i));
747 if (cfg_ports > nb_sys_ports)
748 rte_exit(EXIT_FAILURE, "Port mask requires more ports than "
751 if (kni_setup_port_affinities(nb_sys_ports) < 0)
752 rte_exit(EXIT_FAILURE, "Fail to setup port affinities\n");
754 /* Initialise each port */
755 for (port = 0; port < nb_sys_ports; port++) {
758 /* Skip ports that are not enabled */
759 if ((ports_mask & (1 << port)) == 0) {
764 if (port >= RTE_MAX_ETHPORTS)
765 rte_exit(EXIT_FAILURE, "Can not use more than "
766 "%d ports for kni\n", RTE_MAX_ETHPORTS);
768 kni = rte_kni_create(port, MAX_PACKET_SZ, pktmbuf_pool,
771 rte_exit(EXIT_FAILURE, "Fail to create kni dev "
772 "for port: %d\n", port);
773 kni_port_info[port].kni = kni;
775 check_all_ports_link_status(nb_sys_ports, ports_mask);
777 /* Launch per-lcore function on every lcore */
778 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
779 RTE_LCORE_FOREACH_SLAVE(i) {
780 if (rte_eal_wait_lcore(i) < 0)