4 * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <sys/queue.h>
45 #include <netinet/in.h>
47 #include <linux/if_tun.h>
49 #include <sys/ioctl.h>
53 #include <rte_common.h>
55 #include <rte_memory.h>
56 #include <rte_memcpy.h>
57 #include <rte_memzone.h>
58 #include <rte_tailq.h>
60 #include <rte_per_lcore.h>
61 #include <rte_launch.h>
62 #include <rte_atomic.h>
63 #include <rte_lcore.h>
64 #include <rte_branch_prediction.h>
65 #include <rte_interrupts.h>
67 #include <rte_debug.h>
68 #include <rte_ether.h>
69 #include <rte_ethdev.h>
72 #include <rte_mempool.h>
74 #include <rte_string_fns.h>
75 #include <rte_cycles.h>
76 #include <rte_malloc.h>
79 /* Macros for printing using RTE_LOG */
80 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
82 /* NUMA socket to allocate mbuf pool on */
85 /* Max size of a single packet */
86 #define MAX_PACKET_SZ 2048
88 /* Number of bytes needed for each mbuf */
90 (MAX_PACKET_SZ + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
92 /* Number of mbufs in mempool that is created */
93 #define NB_MBUF (8192 * 16)
95 /* How many packets to attempt to read from NIC in one go */
96 #define PKT_BURST_SZ 32
98 /* How many objects (mbufs) to keep in per-lcore mempool cache */
99 #define MEMPOOL_CACHE_SZ PKT_BURST_SZ
101 /* Number of RX ring descriptors */
104 /* Number of TX ring descriptors */
107 /* Total octets in ethernet header */
108 #define KNI_ENET_HEADER_SIZE 14
110 /* Total octets in the FCS */
111 #define KNI_ENET_FCS_SIZE 4
114 * RX and TX Prefetch, Host, and Write-back threshold values should be
115 * carefully set for optimal performance. Consult the network
116 * controller's datasheet and supporting DPDK documentation for guidance
117 * on how these parameters should be set.
119 /* RX ring configuration */
120 static const struct rte_eth_rxconf rx_conf = {
122 .pthresh = 8, /* Ring prefetch threshold */
123 .hthresh = 8, /* Ring host threshold */
124 .wthresh = 4, /* Ring writeback threshold */
126 .rx_free_thresh = 0, /* Immediately free RX descriptors */
130 * These default values are optimized for use with the Intel(R) 82599 10 GbE
131 * Controller and the DPDK ixgbe PMD. Consider using other values for other
132 * network controllers and/or network drivers.
134 /* TX ring configuration */
135 static const struct rte_eth_txconf tx_conf = {
137 .pthresh = 36, /* Ring prefetch threshold */
138 .hthresh = 0, /* Ring host threshold */
139 .wthresh = 0, /* Ring writeback threshold */
141 .tx_free_thresh = 0, /* Use PMD default values */
142 .tx_rs_thresh = 0, /* Use PMD default values */
145 /* Options for configuring ethernet port */
146 static struct rte_eth_conf port_conf = {
148 .header_split = 0, /* Header Split disabled */
149 .hw_ip_checksum = 0, /* IP checksum offload disabled */
150 .hw_vlan_filter = 0, /* VLAN filtering disabled */
151 .jumbo_frame = 0, /* Jumbo Frame Support disabled */
152 .hw_strip_crc = 0, /* CRC stripped by hardware */
155 .mq_mode = ETH_DCB_NONE,
159 /* Mempool for mbufs */
160 static struct rte_mempool * pktmbuf_pool = NULL;
162 /* Mask of enabled ports */
163 static uint32_t ports_mask = 0;
165 /* Mask of cores that read from NIC and write to tap */
166 static uint32_t input_cores_mask = 0;
168 /* Mask of cores that read from tap and write to NIC */
169 static uint32_t output_cores_mask = 0;
171 /* Structure type for recording kni interface specific stats */
172 struct kni_interface_stats {
173 /* number of pkts received from NIC, and sent to KNI */
176 /* number of pkts received from NIC, but failed to send to KNI */
179 /* number of pkts received from KNI, and sent to NIC */
182 /* number of pkts received from KNI, but failed to send to NIC */
186 /* Structure type for recording port specific information */
187 struct kni_port_info_t {
188 /* lcore id for ingress */
189 unsigned lcore_id_ingress;
191 /* lcore id for egress */
192 unsigned lcore_id_egress;
194 /* pointer to kni interface */
198 /* kni port specific information array*/
199 static struct kni_port_info_t kni_port_info[RTE_MAX_ETHPORTS];
201 /* kni device statistics array */
202 static struct kni_interface_stats kni_stats[RTE_MAX_ETHPORTS];
204 /* Get the pointer to kni interface */
205 static struct rte_kni * kni_lcore_to_kni(unsigned lcore_id);
207 static int kni_change_mtu(uint8_t port_id, unsigned new_mtu);
208 static int kni_config_network_interface(uint8_t port_id, uint8_t if_up);
210 static struct rte_kni_ops kni_ops = {
211 .change_mtu = kni_change_mtu,
212 .config_network_if = kni_config_network_interface,
215 /* Print out statistics on packets handled */
221 printf("\n**KNI example application statistics**\n"
222 "====== ============== ============ ============ ============ ============\n"
223 " Port Lcore(RX/TX) rx_packets rx_dropped tx_packets tx_dropped\n"
224 "------ -------------- ------------ ------------ ------------ ------------\n");
225 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
226 if (kni_port_info[i].kni == NULL)
229 printf("%7d %10u/%2u %13"PRIu64" %13"PRIu64" %13"PRIu64" "
231 kni_port_info[i].lcore_id_ingress,
232 kni_port_info[i].lcore_id_egress,
233 kni_stats[i].rx_packets,
234 kni_stats[i].rx_dropped,
235 kni_stats[i].tx_packets,
236 kni_stats[i].tx_dropped);
238 printf("====== ============== ============ ============ ============ ============\n");
241 /* Custom handling of signals to handle stats */
243 signal_handler(int signum)
245 /* When we receive a USR1 signal, print stats */
246 if (signum == SIGUSR1) {
250 /* When we receive a USR2 signal, reset stats */
251 if (signum == SIGUSR2) {
252 memset(&kni_stats, 0, sizeof(kni_stats));
253 printf("\n**Statistics have been reset**\n");
259 kni_burst_free_mbufs(struct rte_mbuf **pkts, unsigned num)
266 for (i = 0; i < num; i++) {
267 rte_pktmbuf_free(pkts[i]);
273 * Interface to burst rx and enqueue mbufs into rx_q
276 kni_ingress(struct rte_kni *kni)
278 uint8_t port_id = rte_kni_get_port_id(kni);
280 struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
282 if (kni == NULL || port_id >= RTE_MAX_ETHPORTS)
285 /* Burst rx from eth */
286 nb_rx = rte_eth_rx_burst(port_id, 0, pkts_burst, PKT_BURST_SZ);
287 if (nb_rx > PKT_BURST_SZ) {
288 RTE_LOG(ERR, APP, "Error receiving from eth\n");
292 /* Burst tx to kni */
293 num = rte_kni_tx_burst(kni, pkts_burst, nb_rx);
294 kni_stats[port_id].rx_packets += num;
296 if (unlikely(num < nb_rx)) {
297 /* Free mbufs not tx to kni interface */
298 kni_burst_free_mbufs(&pkts_burst[num], nb_rx - num);
299 kni_stats[port_id].rx_dropped += nb_rx - num;
304 * Interface to dequeue mbufs from tx_q and burst tx
307 kni_egress(struct rte_kni *kni)
309 uint8_t port_id = rte_kni_get_port_id(kni);;
311 struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
313 if (kni == NULL || port_id >= RTE_MAX_ETHPORTS)
316 /* Burst rx from kni */
317 num = rte_kni_rx_burst(kni, pkts_burst, PKT_BURST_SZ);
318 if (num > PKT_BURST_SZ) {
319 RTE_LOG(ERR, APP, "Error receiving from KNI\n");
323 /* Burst tx to eth */
324 nb_tx = rte_eth_tx_burst(port_id, 0, pkts_burst, (uint16_t)num);
325 kni_stats[port_id].tx_packets += nb_tx;
327 if (unlikely(nb_tx < num)) {
328 /* Free mbufs not tx to NIC */
329 kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx);
330 kni_stats[port_id].tx_dropped += num - nb_tx;
334 /* Main processing loop */
335 static __attribute__((noreturn)) int
336 main_loop(__rte_unused void *arg)
339 const unsigned lcore_id = rte_lcore_id();
340 struct rte_kni *kni = kni_lcore_to_kni(lcore_id);
343 RTE_LOG(INFO, APP, "Lcore %u has nothing to do\n", lcore_id);
345 ; /* loop doing nothing */
347 pid = rte_kni_get_port_id(kni);
348 if (pid >= RTE_MAX_ETHPORTS)
349 rte_exit(EXIT_FAILURE, "Failure: port id >= %d\n",
352 if (kni_port_info[pid].lcore_id_ingress == lcore_id) {
353 /* Running on lcores for input packets */
354 RTE_LOG(INFO, APP, "Lcore %u is reading from "
355 "port %d\n", lcore_id, pid);
361 } else if (kni_port_info[pid].lcore_id_egress == lcore_id) {
362 /* Running on lcores for output packets */
363 RTE_LOG(INFO, APP, "Lcore %u is writing to port %d\n",
371 RTE_LOG(INFO, APP, "Lcore %u has nothing to do\n",
374 ; /* loop doing nothing */
379 /* Display usage instructions */
381 print_usage(const char *prgname)
383 RTE_LOG(INFO, APP, "\nUsage: %s [EAL options] -- -p PORTMASK "
384 "-i IN_CORES -o OUT_CORES\n"
385 " -p PORTMASK: hex bitmask of ports to use\n"
386 " -i IN_CORES: hex bitmask of cores which read "
388 " -o OUT_CORES: hex bitmask of cores which write to NIC\n",
392 /* Convert string to unsigned number. 0 is returned if error occurs */
394 parse_unsigned(const char *portmask)
399 num = strtoul(portmask, &end, 16);
400 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
403 return (uint32_t)num;
407 kni_setup_port_affinities(uint8_t nb_port)
410 uint32_t in_lcore, out_lcore;
411 uint8_t rx_port = 0, tx_port = 0;
414 if (nb_port > RTE_MAX_ETHPORTS) {
415 RTE_LOG(ERR, APP, "The number of ports exceeds the maximum "
416 "number of 0x%x\n", RTE_MAX_ETHPORTS);
420 RTE_LCORE_FOREACH(i) {
421 in_lcore = input_cores_mask & (1 << i);
422 out_lcore = output_cores_mask & (1 << i);
424 /* Check if it is in input lcore or output lcore mask */
425 if (in_lcore == 0 && out_lcore == 0)
428 /* Check if it is in both input lcore and output lcore mask */
429 if (in_lcore != 0 && out_lcore != 0) {
430 RTE_LOG(ERR, APP, "Lcore 0x%x can not be used in both "
431 "input lcore and output lcore mask\n", i);
435 /* Check if the lcore is enabled or not */
436 if (rte_lcore_is_enabled(i) == 0) {
437 RTE_LOG(ERR, APP, "Lcore 0x%x is not enabled\n", i);
442 /* It is be for packet receiving */
443 while ((rx_port < nb_port) &&
444 ((ports_mask & (1 << rx_port)) == 0))
447 if (rx_port >= nb_port) {
448 RTE_LOG(ERR, APP, "There is no enough ports "
449 "for ingress lcores\n");
452 kni_port_info[rx_port].lcore_id_ingress = i;
455 /* It is for packet transmitting */
456 while ((tx_port < nb_port) &&
457 ((ports_mask & (1 << tx_port)) == 0))
460 if (tx_port >= nb_port) {
461 RTE_LOG(ERR, APP, "There is no enough ports "
462 "for engree lcores\n");
465 kni_port_info[tx_port].lcore_id_egress = i;
470 /* Display all the port/lcore affinity */
471 for (pid = 0; pid < nb_port; pid++) {
472 RTE_LOG(INFO, APP, "Port%d, ingress lcore id: %u, "
473 "egress lcore id: %u\n", pid,
474 kni_port_info[pid].lcore_id_ingress,
475 kni_port_info[pid].lcore_id_egress);
481 static struct rte_kni *
482 kni_lcore_to_kni(unsigned lcore_id)
485 struct kni_port_info_t *p = kni_port_info;
487 for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
488 if (p[pid].kni != NULL && (p[pid].lcore_id_ingress == lcore_id
489 || p[pid].lcore_id_egress == lcore_id))
496 /* Parse the arguments given in the command line of the application */
498 parse_args(int argc, char **argv)
501 const char *prgname = argv[0];
503 /* Disable printing messages within getopt() */
506 /* Parse command line */
507 while ((opt = getopt(argc, argv, "i:o:p:")) != EOF) {
510 input_cores_mask = parse_unsigned(optarg);
513 output_cores_mask = parse_unsigned(optarg);
516 ports_mask = parse_unsigned(optarg);
519 print_usage(prgname);
520 rte_exit(EXIT_FAILURE, "Invalid option specified");
524 /* Check that options were parsed ok */
525 if (input_cores_mask == 0) {
526 print_usage(prgname);
527 rte_exit(EXIT_FAILURE, "IN_CORES not specified correctly");
529 if (output_cores_mask == 0) {
530 print_usage(prgname);
531 rte_exit(EXIT_FAILURE, "OUT_CORES not specified correctly");
533 if (ports_mask == 0) {
534 print_usage(prgname);
535 rte_exit(EXIT_FAILURE, "PORTMASK not specified correctly");
539 /* Initialise a single port on an Ethernet device */
541 init_port(uint8_t port)
545 /* Initialise device and RX/TX queues */
546 RTE_LOG(INFO, APP, "Initialising port %u ...\n", (unsigned)port);
548 ret = rte_eth_dev_configure(port, 1, 1, &port_conf);
550 rte_exit(EXIT_FAILURE, "Could not configure port%u (%d)",
551 (unsigned)port, ret);
553 ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, SOCKET, &rx_conf,
556 rte_exit(EXIT_FAILURE, "Could not setup up RX queue for "
557 "port%u (%d)", (unsigned)port, ret);
559 ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, SOCKET, &tx_conf);
561 rte_exit(EXIT_FAILURE, "Could not setup up TX queue for "
562 "port%u (%d)", (unsigned)port, ret);
564 ret = rte_eth_dev_start(port);
566 rte_exit(EXIT_FAILURE, "Could not start port%u (%d)",
567 (unsigned)port, ret);
569 rte_eth_promiscuous_enable(port);
572 /* Check the link status of all ports in up to 9s, and print them finally */
574 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
576 #define CHECK_INTERVAL 100 /* 100ms */
577 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
578 uint8_t portid, count, all_ports_up, print_flag = 0;
579 struct rte_eth_link link;
581 printf("\nChecking link status");
583 for (count = 0; count <= MAX_CHECK_TIME; count++) {
585 for (portid = 0; portid < port_num; portid++) {
586 if ((port_mask & (1 << portid)) == 0)
588 memset(&link, 0, sizeof(link));
589 rte_eth_link_get_nowait(portid, &link);
590 /* print link status if flag set */
591 if (print_flag == 1) {
592 if (link.link_status)
593 printf("Port %d Link Up - speed %u "
594 "Mbps - %s\n", (uint8_t)portid,
595 (unsigned)link.link_speed,
596 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
597 ("full-duplex") : ("half-duplex\n"));
599 printf("Port %d Link Down\n",
603 /* clear all_ports_up flag if any link down */
604 if (link.link_status == 0) {
609 /* after finally printing all link status, get out */
613 if (all_ports_up == 0) {
616 rte_delay_ms(CHECK_INTERVAL);
619 /* set the print_flag if all ports up or timeout */
620 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
627 /* Callback for request of changing MTU */
629 kni_change_mtu(uint8_t port_id, unsigned new_mtu)
632 struct rte_eth_conf conf;
634 if (port_id >= rte_eth_dev_count()) {
635 RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id);
639 RTE_LOG(INFO, APP, "Change MTU of port %d to %u\n", port_id, new_mtu);
641 /* Stop specific port */
642 rte_eth_dev_stop(port_id);
644 memcpy(&conf, &port_conf, sizeof(conf));
646 if (new_mtu > ETHER_MAX_LEN)
647 conf.rxmode.jumbo_frame = 1;
649 conf.rxmode.jumbo_frame = 0;
651 /* mtu + length of header + length of FCS = max pkt length */
652 conf.rxmode.max_rx_pkt_len = new_mtu + KNI_ENET_HEADER_SIZE +
654 ret = rte_eth_dev_configure(port_id, 1, 1, &conf);
656 RTE_LOG(ERR, APP, "Fail to reconfigure port %d\n", port_id);
660 /* Restart specific port */
661 ret = rte_eth_dev_start(port_id);
663 RTE_LOG(ERR, APP, "Fail to restart port %d\n", port_id);
670 /* Callback for request of configuring network interface up/down */
672 kni_config_network_interface(uint8_t port_id, uint8_t if_up)
676 if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) {
677 RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id);
681 RTE_LOG(INFO, APP, "Configure network interface of %d %s\n",
682 port_id, if_up ? "up" : "down");
684 if (if_up != 0) { /* Configure network interface up */
685 rte_eth_dev_stop(port_id);
686 ret = rte_eth_dev_start(port_id);
687 } else /* Configure network interface down */
688 rte_eth_dev_stop(port_id);
691 RTE_LOG(ERR, APP, "Failed to start port %d\n", port_id);
696 /* Initialise ports/queues etc. and start main loop on each core */
698 main(int argc, char** argv)
701 unsigned i, cfg_ports = 0;
702 uint8_t nb_sys_ports, port;
704 /* Associate signal_hanlder function with USR signals */
705 signal(SIGUSR1, signal_handler);
706 signal(SIGUSR2, signal_handler);
709 ret = rte_eal_init(argc, argv);
711 rte_exit(EXIT_FAILURE, "Could not initialise EAL (%d)", ret);
715 /* Parse application arguments (after the EAL ones) */
716 parse_args(argc, argv);
718 /* Create the mbuf pool */
719 pktmbuf_pool = rte_mempool_create("mbuf_pool", NB_MBUF, MBUF_SZ,
721 sizeof(struct rte_pktmbuf_pool_private),
722 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL,
724 if (pktmbuf_pool == NULL) {
725 rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool");
729 /* Initialise PMD driver(s) */
730 ret = rte_pmd_init_all();
732 rte_exit(EXIT_FAILURE, "Could not initialise PMD (%d)", ret);
734 /* Scan PCI bus for recognised devices */
735 ret = rte_eal_pci_probe();
737 rte_exit(EXIT_FAILURE, "Could not probe PCI (%d)", ret);
739 /* Get number of ports found in scan */
740 nb_sys_ports = rte_eth_dev_count();
741 if (nb_sys_ports == 0)
742 rte_exit(EXIT_FAILURE, "No supported Ethernet devices found - "
743 "check that CONFIG_RTE_LIBRTE_IGB_PMD=y and/or "
744 "CONFIG_RTE_LIBRTE_IXGBE_PMD=y in the config file");
745 /* Find the number of configured ports in the port mask */
746 for (i = 0; i < sizeof(ports_mask) * 8; i++)
747 cfg_ports += !! (ports_mask & (1 << i));
749 if (cfg_ports > nb_sys_ports)
750 rte_exit(EXIT_FAILURE, "Port mask requires more ports than "
753 if (kni_setup_port_affinities(nb_sys_ports) < 0)
754 rte_exit(EXIT_FAILURE, "Fail to setup port affinities\n");
756 /* Initialise each port */
757 for (port = 0; port < nb_sys_ports; port++) {
760 /* Skip ports that are not enabled */
761 if ((ports_mask & (1 << port)) == 0) {
766 if (port >= RTE_MAX_ETHPORTS)
767 rte_exit(EXIT_FAILURE, "Can not use more than "
768 "%d ports for kni\n", RTE_MAX_ETHPORTS);
770 kni = rte_kni_create(port, MAX_PACKET_SZ, pktmbuf_pool,
773 rte_exit(EXIT_FAILURE, "Fail to create kni dev "
774 "for port: %d\n", port);
775 kni_port_info[port].kni = kni;
777 check_all_ports_link_status(nb_sys_ports, ports_mask);
779 /* Launch per-lcore function on every lcore */
780 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
781 RTE_LCORE_FOREACH_SLAVE(i) {
782 if (rte_eal_wait_lcore(i) < 0)