4 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include <rte_common.h>
39 #include <rte_errno.h>
40 #include <rte_ethdev.h>
41 #include <rte_lcore.h>
42 #include <rte_malloc.h>
44 #include <rte_mempool.h>
46 #include <rte_reorder.h>
48 #define RX_DESC_PER_QUEUE 128
49 #define TX_DESC_PER_QUEUE 512
51 #define MAX_PKTS_BURST 32
52 #define REORDER_BUFFER_SIZE 8192
53 #define MBUF_PER_POOL 65535
54 #define MBUF_POOL_CACHE_SIZE 250
56 #define RING_SIZE 16384
58 /* Macros for printing using RTE_LOG */
59 #define RTE_LOGTYPE_REORDERAPP RTE_LOGTYPE_USER1
61 unsigned int portmask;
62 unsigned int disable_reorder;
63 volatile uint8_t quit_signal;
65 static struct rte_mempool *mbuf_pool;
67 static struct rte_eth_conf port_conf_default;
69 struct worker_thread_args {
70 struct rte_ring *ring_in;
71 struct rte_ring *ring_out;
74 struct send_thread_args {
75 struct rte_ring *ring_in;
76 struct rte_reorder_buffer *buffer;
79 volatile struct app_stats {
82 uint64_t enqueue_pkts;
83 uint64_t enqueue_failed_pkts;
84 } rx __rte_cache_aligned;
87 uint64_t dequeue_pkts;
88 uint64_t enqueue_pkts;
89 uint64_t enqueue_failed_pkts;
90 } wkr __rte_cache_aligned;
93 uint64_t dequeue_pkts;
94 /* Too early pkts transmitted directly w/o reordering */
95 uint64_t early_pkts_txtd_woro;
96 /* Too early pkts failed from direct transmit */
97 uint64_t early_pkts_tx_failed_woro;
99 uint64_t ro_tx_failed_pkts;
100 } tx __rte_cache_aligned;
104 * Get the last enabled lcore ID
107 * The last enabled lcore ID.
110 get_last_lcore_id(void)
114 for (i = RTE_MAX_LCORE - 1; i >= 0; i--)
115 if (rte_lcore_is_enabled(i))
121 * Get the previous enabled lcore ID
123 * The current lcore ID
125 * The previous enabled lcore ID or the current lcore
126 * ID if it is the first available core.
129 get_previous_lcore_id(unsigned int id)
133 for (i = id - 1; i >= 0; i--)
134 if (rte_lcore_is_enabled(i))
140 pktmbuf_free_bulk(struct rte_mbuf *mbuf_table[], unsigned n)
144 for (i = 0; i < n; i++)
145 rte_pktmbuf_free(mbuf_table[i]);
150 print_usage(const char *prgname)
152 printf("%s [EAL options] -- -p PORTMASK\n"
153 " -p PORTMASK: hexadecimal bitmask of ports to configure\n",
158 parse_portmask(const char *portmask)
163 /* parse hexadecimal string */
164 pm = strtoul(portmask, &end, 16);
165 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
174 /* Parse the argument given in the command line of the application */
176 parse_args(int argc, char **argv)
181 char *prgname = argv[0];
182 static struct option lgopts[] = {
183 {"disable-reorder", 0, 0, 0},
189 while ((opt = getopt_long(argc, argvopt, "p:",
190 lgopts, &option_index)) != EOF) {
194 portmask = parse_portmask(optarg);
196 printf("invalid portmask\n");
197 print_usage(prgname);
203 if (!strcmp(lgopts[option_index].name, "disable-reorder")) {
204 printf("reorder disabled\n");
209 print_usage(prgname);
214 print_usage(prgname);
218 argv[optind-1] = prgname;
219 optind = 1; /* reset getopt lib */
224 * Tx buffer error callback
227 flush_tx_error_callback(struct rte_mbuf **unsent, uint16_t count,
228 void *userdata __rte_unused) {
230 /* free the mbufs which failed from transmit */
231 app_stats.tx.ro_tx_failed_pkts += count;
232 RTE_LOG_DP(DEBUG, REORDERAPP, "%s:Packet loss with tx_burst\n", __func__);
233 pktmbuf_free_bulk(unsent, count);
238 free_tx_buffers(struct rte_eth_dev_tx_buffer *tx_buffer[]) {
239 const uint8_t nb_ports = rte_eth_dev_count();
242 /* initialize buffers for all ports */
243 for (port_id = 0; port_id < nb_ports; port_id++) {
244 /* skip ports that are not enabled */
245 if ((portmask & (1 << port_id)) == 0)
248 rte_free(tx_buffer[port_id]);
254 configure_tx_buffers(struct rte_eth_dev_tx_buffer *tx_buffer[])
256 const uint8_t nb_ports = rte_eth_dev_count();
260 /* initialize buffers for all ports */
261 for (port_id = 0; port_id < nb_ports; port_id++) {
262 /* skip ports that are not enabled */
263 if ((portmask & (1 << port_id)) == 0)
266 /* Initialize TX buffers */
267 tx_buffer[port_id] = rte_zmalloc_socket("tx_buffer",
268 RTE_ETH_TX_BUFFER_SIZE(MAX_PKTS_BURST), 0,
269 rte_eth_dev_socket_id(port_id));
270 if (tx_buffer[port_id] == NULL)
271 rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n",
274 rte_eth_tx_buffer_init(tx_buffer[port_id], MAX_PKTS_BURST);
276 ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[port_id],
277 flush_tx_error_callback, NULL);
279 rte_exit(EXIT_FAILURE, "Cannot set error callback for "
280 "tx buffer on port %u\n", (unsigned) port_id);
286 configure_eth_port(uint8_t port_id)
288 struct ether_addr addr;
289 const uint16_t rxRings = 1, txRings = 1;
290 const uint8_t nb_ports = rte_eth_dev_count();
293 uint16_t nb_rxd = RX_DESC_PER_QUEUE;
294 uint16_t nb_txd = TX_DESC_PER_QUEUE;
296 if (port_id > nb_ports)
299 ret = rte_eth_dev_configure(port_id, rxRings, txRings, &port_conf_default);
303 ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd);
307 for (q = 0; q < rxRings; q++) {
308 ret = rte_eth_rx_queue_setup(port_id, q, nb_rxd,
309 rte_eth_dev_socket_id(port_id), NULL,
315 for (q = 0; q < txRings; q++) {
316 ret = rte_eth_tx_queue_setup(port_id, q, nb_txd,
317 rte_eth_dev_socket_id(port_id), NULL);
322 ret = rte_eth_dev_start(port_id);
326 rte_eth_macaddr_get(port_id, &addr);
327 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
328 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
330 addr.addr_bytes[0], addr.addr_bytes[1],
331 addr.addr_bytes[2], addr.addr_bytes[3],
332 addr.addr_bytes[4], addr.addr_bytes[5]);
334 rte_eth_promiscuous_enable(port_id);
342 const uint8_t nb_ports = rte_eth_dev_count();
344 struct rte_eth_stats eth_stats;
346 printf("\nRX thread stats:\n");
347 printf(" - Pkts rxd: %"PRIu64"\n",
348 app_stats.rx.rx_pkts);
349 printf(" - Pkts enqd to workers ring: %"PRIu64"\n",
350 app_stats.rx.enqueue_pkts);
352 printf("\nWorker thread stats:\n");
353 printf(" - Pkts deqd from workers ring: %"PRIu64"\n",
354 app_stats.wkr.dequeue_pkts);
355 printf(" - Pkts enqd to tx ring: %"PRIu64"\n",
356 app_stats.wkr.enqueue_pkts);
357 printf(" - Pkts enq to tx failed: %"PRIu64"\n",
358 app_stats.wkr.enqueue_failed_pkts);
360 printf("\nTX stats:\n");
361 printf(" - Pkts deqd from tx ring: %"PRIu64"\n",
362 app_stats.tx.dequeue_pkts);
363 printf(" - Ro Pkts transmitted: %"PRIu64"\n",
364 app_stats.tx.ro_tx_pkts);
365 printf(" - Ro Pkts tx failed: %"PRIu64"\n",
366 app_stats.tx.ro_tx_failed_pkts);
367 printf(" - Pkts transmitted w/o reorder: %"PRIu64"\n",
368 app_stats.tx.early_pkts_txtd_woro);
369 printf(" - Pkts tx failed w/o reorder: %"PRIu64"\n",
370 app_stats.tx.early_pkts_tx_failed_woro);
372 for (i = 0; i < nb_ports; i++) {
373 rte_eth_stats_get(i, ð_stats);
374 printf("\nPort %u stats:\n", i);
375 printf(" - Pkts in: %"PRIu64"\n", eth_stats.ipackets);
376 printf(" - Pkts out: %"PRIu64"\n", eth_stats.opackets);
377 printf(" - In Errs: %"PRIu64"\n", eth_stats.ierrors);
378 printf(" - Out Errs: %"PRIu64"\n", eth_stats.oerrors);
379 printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf);
384 int_handler(int sig_num)
386 printf("Exiting on signal %d\n", sig_num);
391 * This thread receives mbufs from the port and affects them an internal
392 * sequence number to keep track of their order of arrival through an
394 * The mbufs are then passed to the worker threads via the rx_to_workers
398 rx_thread(struct rte_ring *ring_out)
400 const uint8_t nb_ports = rte_eth_dev_count();
405 struct rte_mbuf *pkts[MAX_PKTS_BURST];
407 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
410 while (!quit_signal) {
412 for (port_id = 0; port_id < nb_ports; port_id++) {
413 if ((portmask & (1 << port_id)) != 0) {
415 /* receive packets */
416 nb_rx_pkts = rte_eth_rx_burst(port_id, 0,
417 pkts, MAX_PKTS_BURST);
418 if (nb_rx_pkts == 0) {
419 RTE_LOG_DP(DEBUG, REORDERAPP,
420 "%s():Received zero packets\n", __func__);
423 app_stats.rx.rx_pkts += nb_rx_pkts;
425 /* mark sequence number */
426 for (i = 0; i < nb_rx_pkts; )
427 pkts[i++]->seqn = seqn++;
429 /* enqueue to rx_to_workers ring */
430 ret = rte_ring_enqueue_burst(ring_out,
431 (void *)pkts, nb_rx_pkts, NULL);
432 app_stats.rx.enqueue_pkts += ret;
433 if (unlikely(ret < nb_rx_pkts)) {
434 app_stats.rx.enqueue_failed_pkts +=
436 pktmbuf_free_bulk(&pkts[ret], nb_rx_pkts - ret);
445 * This thread takes bursts of packets from the rx_to_workers ring and
446 * Changes the input port value to output port value. And feds it to
450 worker_thread(void *args_ptr)
452 const uint8_t nb_ports = rte_eth_dev_count();
454 uint16_t burst_size = 0;
455 struct worker_thread_args *args;
456 struct rte_mbuf *burst_buffer[MAX_PKTS_BURST] = { NULL };
457 struct rte_ring *ring_in, *ring_out;
458 const unsigned xor_val = (nb_ports > 1);
460 args = (struct worker_thread_args *) args_ptr;
461 ring_in = args->ring_in;
462 ring_out = args->ring_out;
464 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
467 while (!quit_signal) {
469 /* dequeue the mbufs from rx_to_workers ring */
470 burst_size = rte_ring_dequeue_burst(ring_in,
471 (void *)burst_buffer, MAX_PKTS_BURST, NULL);
472 if (unlikely(burst_size == 0))
475 __sync_fetch_and_add(&app_stats.wkr.dequeue_pkts, burst_size);
477 /* just do some operation on mbuf */
478 for (i = 0; i < burst_size;)
479 burst_buffer[i++]->port ^= xor_val;
481 /* enqueue the modified mbufs to workers_to_tx ring */
482 ret = rte_ring_enqueue_burst(ring_out, (void *)burst_buffer,
484 __sync_fetch_and_add(&app_stats.wkr.enqueue_pkts, ret);
485 if (unlikely(ret < burst_size)) {
486 /* Return the mbufs to their respective pool, dropping packets */
487 __sync_fetch_and_add(&app_stats.wkr.enqueue_failed_pkts,
488 (int)burst_size - ret);
489 pktmbuf_free_bulk(&burst_buffer[ret], burst_size - ret);
496 * Dequeue mbufs from the workers_to_tx ring and reorder them before
500 send_thread(struct send_thread_args *args)
503 unsigned int i, dret;
504 uint16_t nb_dq_mbufs;
507 struct rte_mbuf *mbufs[MAX_PKTS_BURST];
508 struct rte_mbuf *rombufs[MAX_PKTS_BURST] = {NULL};
509 static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
511 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__, rte_lcore_id());
513 configure_tx_buffers(tx_buffer);
515 while (!quit_signal) {
517 /* deque the mbufs from workers_to_tx ring */
518 nb_dq_mbufs = rte_ring_dequeue_burst(args->ring_in,
519 (void *)mbufs, MAX_PKTS_BURST, NULL);
521 if (unlikely(nb_dq_mbufs == 0))
524 app_stats.tx.dequeue_pkts += nb_dq_mbufs;
526 for (i = 0; i < nb_dq_mbufs; i++) {
527 /* send dequeued mbufs for reordering */
528 ret = rte_reorder_insert(args->buffer, mbufs[i]);
530 if (ret == -1 && rte_errno == ERANGE) {
531 /* Too early pkts should be transmitted out directly */
532 RTE_LOG_DP(DEBUG, REORDERAPP,
533 "%s():Cannot reorder early packet "
534 "direct enqueuing to TX\n", __func__);
535 outp = mbufs[i]->port;
536 if ((portmask & (1 << outp)) == 0) {
537 rte_pktmbuf_free(mbufs[i]);
540 if (rte_eth_tx_burst(outp, 0, (void *)mbufs[i], 1) != 1) {
541 rte_pktmbuf_free(mbufs[i]);
542 app_stats.tx.early_pkts_tx_failed_woro++;
544 app_stats.tx.early_pkts_txtd_woro++;
545 } else if (ret == -1 && rte_errno == ENOSPC) {
547 * Early pkts just outside of window should be dropped
549 rte_pktmbuf_free(mbufs[i]);
554 * drain MAX_PKTS_BURST of reordered
557 dret = rte_reorder_drain(args->buffer, rombufs, MAX_PKTS_BURST);
558 for (i = 0; i < dret; i++) {
560 struct rte_eth_dev_tx_buffer *outbuf;
563 outp1 = rombufs[i]->port;
564 /* skip ports that are not enabled */
565 if ((portmask & (1 << outp1)) == 0) {
566 rte_pktmbuf_free(rombufs[i]);
570 outbuf = tx_buffer[outp1];
571 sent = rte_eth_tx_buffer(outp1, 0, outbuf, rombufs[i]);
573 app_stats.tx.ro_tx_pkts += sent;
577 free_tx_buffers(tx_buffer);
583 * Dequeue mbufs from the workers_to_tx ring and transmit them
586 tx_thread(struct rte_ring *ring_in)
591 struct rte_mbuf *mbufs[MAX_PKTS_BURST];
592 struct rte_eth_dev_tx_buffer *outbuf;
593 static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
595 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
598 configure_tx_buffers(tx_buffer);
600 while (!quit_signal) {
602 /* deque the mbufs from workers_to_tx ring */
603 dqnum = rte_ring_dequeue_burst(ring_in,
604 (void *)mbufs, MAX_PKTS_BURST, NULL);
606 if (unlikely(dqnum == 0))
609 app_stats.tx.dequeue_pkts += dqnum;
611 for (i = 0; i < dqnum; i++) {
612 outp = mbufs[i]->port;
613 /* skip ports that are not enabled */
614 if ((portmask & (1 << outp)) == 0) {
615 rte_pktmbuf_free(mbufs[i]);
619 outbuf = tx_buffer[outp];
620 sent = rte_eth_tx_buffer(outp, 0, outbuf, mbufs[i]);
622 app_stats.tx.ro_tx_pkts += sent;
630 main(int argc, char **argv)
634 unsigned int lcore_id, last_lcore_id, master_lcore_id;
636 uint8_t nb_ports_available;
637 struct worker_thread_args worker_args = {NULL, NULL};
638 struct send_thread_args send_args = {NULL, NULL};
639 struct rte_ring *rx_to_workers;
640 struct rte_ring *workers_to_tx;
642 /* catch ctrl-c so we can print on exit */
643 signal(SIGINT, int_handler);
646 ret = rte_eal_init(argc, argv);
653 /* Parse the application specific arguments */
654 ret = parse_args(argc, argv);
658 /* Check if we have enought cores */
659 if (rte_lcore_count() < 3)
660 rte_exit(EXIT_FAILURE, "Error, This application needs at "
661 "least 3 logical cores to run:\n"
662 "1 lcore for packet RX\n"
663 "1 lcore for packet TX\n"
664 "and at least 1 lcore for worker threads\n");
666 nb_ports = rte_eth_dev_count();
668 rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n");
669 if (nb_ports != 1 && (nb_ports & 1))
670 rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except "
671 "when using a single port\n");
673 mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL,
674 MBUF_POOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
676 if (mbuf_pool == NULL)
677 rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
679 nb_ports_available = nb_ports;
681 /* initialize all ports */
682 for (port_id = 0; port_id < nb_ports; port_id++) {
683 /* skip ports that are not enabled */
684 if ((portmask & (1 << port_id)) == 0) {
685 printf("\nSkipping disabled port %d\n", port_id);
686 nb_ports_available--;
690 printf("Initializing port %u... done\n", (unsigned) port_id);
692 if (configure_eth_port(port_id) != 0)
693 rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n",
697 if (!nb_ports_available) {
698 rte_exit(EXIT_FAILURE,
699 "All available ports are disabled. Please set portmask.\n");
702 /* Create rings for inter core communication */
703 rx_to_workers = rte_ring_create("rx_to_workers", RING_SIZE, rte_socket_id(),
705 if (rx_to_workers == NULL)
706 rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
708 workers_to_tx = rte_ring_create("workers_to_tx", RING_SIZE, rte_socket_id(),
710 if (workers_to_tx == NULL)
711 rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
713 if (!disable_reorder) {
714 send_args.buffer = rte_reorder_create("PKT_RO", rte_socket_id(),
715 REORDER_BUFFER_SIZE);
716 if (send_args.buffer == NULL)
717 rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
720 last_lcore_id = get_last_lcore_id();
721 master_lcore_id = rte_get_master_lcore();
723 worker_args.ring_in = rx_to_workers;
724 worker_args.ring_out = workers_to_tx;
726 /* Start worker_thread() on all the available slave cores but the last 1 */
727 for (lcore_id = 0; lcore_id <= get_previous_lcore_id(last_lcore_id); lcore_id++)
728 if (rte_lcore_is_enabled(lcore_id) && lcore_id != master_lcore_id)
729 rte_eal_remote_launch(worker_thread, (void *)&worker_args,
732 if (disable_reorder) {
733 /* Start tx_thread() on the last slave core */
734 rte_eal_remote_launch((lcore_function_t *)tx_thread, workers_to_tx,
737 send_args.ring_in = workers_to_tx;
738 /* Start send_thread() on the last slave core */
739 rte_eal_remote_launch((lcore_function_t *)send_thread,
740 (void *)&send_args, last_lcore_id);
743 /* Start rx_thread() on the master core */
744 rx_thread(rx_to_workers);
746 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
747 if (rte_eal_wait_lcore(lcore_id) < 0)