4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include <rte_common.h>
39 #include <rte_errno.h>
40 #include <rte_ethdev.h>
41 #include <rte_lcore.h>
43 #include <rte_mempool.h>
45 #include <rte_reorder.h>
47 #define RX_DESC_PER_QUEUE 128
48 #define TX_DESC_PER_QUEUE 512
50 #define MAX_PKTS_BURST 32
51 #define REORDER_BUFFER_SIZE 8192
52 #define MBUF_PER_POOL 65535
53 #define MBUF_SIZE (1600 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
54 #define MBUF_POOL_CACHE_SIZE 250
56 #define RING_SIZE 16384
58 /* uncommnet below line to enable debug logs */
62 #define LOG_LEVEL RTE_LOG_DEBUG
63 #define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
65 #define LOG_LEVEL RTE_LOG_INFO
66 #define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
69 /* Macros for printing using RTE_LOG */
70 #define RTE_LOGTYPE_REORDERAPP RTE_LOGTYPE_USER1
72 unsigned int portmask;
73 unsigned int disable_reorder;
74 volatile uint8_t quit_signal;
76 static struct rte_mempool *mbuf_pool;
78 static struct rte_eth_conf port_conf_default;
80 struct worker_thread_args {
81 struct rte_ring *ring_in;
82 struct rte_ring *ring_out;
85 struct output_buffer {
87 struct rte_mbuf *mbufs[MAX_PKTS_BURST];
90 volatile struct app_stats {
93 uint64_t enqueue_pkts;
94 uint64_t enqueue_failed_pkts;
95 } rx __rte_cache_aligned;
98 uint64_t dequeue_pkts;
99 uint64_t enqueue_pkts;
100 uint64_t enqueue_failed_pkts;
101 } wkr __rte_cache_aligned;
104 uint64_t dequeue_pkts;
105 /* Too early pkts transmitted directly w/o reordering */
106 uint64_t early_pkts_txtd_woro;
107 /* Too early pkts failed from direct transmit */
108 uint64_t early_pkts_tx_failed_woro;
110 uint64_t ro_tx_failed_pkts;
111 } tx __rte_cache_aligned;
115 * Get the last enabled lcore ID
118 * The last enabled lcore ID.
121 get_last_lcore_id(void)
125 for (i = RTE_MAX_LCORE - 1; i >= 0; i--)
126 if (rte_lcore_is_enabled(i))
132 * Get the previous enabled lcore ID
134 * The current lcore ID
136 * The previous enabled lcore ID or the current lcore
137 * ID if it is the first available core.
140 get_previous_lcore_id(unsigned int id)
144 for (i = id - 1; i >= 0; i--)
145 if (rte_lcore_is_enabled(i))
151 pktmbuf_free_bulk(struct rte_mbuf *mbuf_table[], unsigned n)
155 for (i = 0; i < n; i++)
156 rte_pktmbuf_free(mbuf_table[i]);
161 print_usage(const char *prgname)
163 printf("%s [EAL options] -- -p PORTMASK\n"
164 " -p PORTMASK: hexadecimal bitmask of ports to configure\n",
169 parse_portmask(const char *portmask)
174 /* parse hexadecimal string */
175 pm = strtoul(portmask, &end, 16);
176 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
185 /* Parse the argument given in the command line of the application */
187 parse_args(int argc, char **argv)
192 char *prgname = argv[0];
193 static struct option lgopts[] = {
194 {"disable-reorder", 0, 0, 0},
200 while ((opt = getopt_long(argc, argvopt, "p:",
201 lgopts, &option_index)) != EOF) {
205 portmask = parse_portmask(optarg);
207 printf("invalid portmask\n");
208 print_usage(prgname);
214 if (!strcmp(lgopts[option_index].name, "disable-reorder")) {
215 printf("reorder disabled\n");
220 print_usage(prgname);
225 print_usage(prgname);
229 argv[optind-1] = prgname;
230 optind = 0; /* reset getopt lib */
235 configure_eth_port(uint8_t port_id)
237 struct ether_addr addr;
238 const uint16_t rxRings = 1, txRings = 1;
239 const uint8_t nb_ports = rte_eth_dev_count();
243 if (port_id > nb_ports)
246 ret = rte_eth_dev_configure(port_id, rxRings, txRings, &port_conf_default);
250 for (q = 0; q < rxRings; q++) {
251 ret = rte_eth_rx_queue_setup(port_id, q, RX_DESC_PER_QUEUE,
252 rte_eth_dev_socket_id(port_id), NULL,
258 for (q = 0; q < txRings; q++) {
259 ret = rte_eth_tx_queue_setup(port_id, q, TX_DESC_PER_QUEUE,
260 rte_eth_dev_socket_id(port_id), NULL);
265 ret = rte_eth_dev_start(port_id);
269 rte_eth_macaddr_get(port_id, &addr);
270 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
271 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
273 addr.addr_bytes[0], addr.addr_bytes[1],
274 addr.addr_bytes[2], addr.addr_bytes[3],
275 addr.addr_bytes[4], addr.addr_bytes[5]);
277 rte_eth_promiscuous_enable(port_id);
285 const uint8_t nb_ports = rte_eth_dev_count();
287 struct rte_eth_stats eth_stats;
289 printf("\nRX thread stats:\n");
290 printf(" - Pkts rxd: %"PRIu64"\n",
291 app_stats.rx.rx_pkts);
292 printf(" - Pkts enqd to workers ring: %"PRIu64"\n",
293 app_stats.rx.enqueue_pkts);
295 printf("\nWorker thread stats:\n");
296 printf(" - Pkts deqd from workers ring: %"PRIu64"\n",
297 app_stats.wkr.dequeue_pkts);
298 printf(" - Pkts enqd to tx ring: %"PRIu64"\n",
299 app_stats.wkr.enqueue_pkts);
300 printf(" - Pkts enq to tx failed: %"PRIu64"\n",
301 app_stats.wkr.enqueue_failed_pkts);
303 printf("\nTX stats:\n");
304 printf(" - Pkts deqd from tx ring: %"PRIu64"\n",
305 app_stats.tx.dequeue_pkts);
306 printf(" - Ro Pkts transmitted: %"PRIu64"\n",
307 app_stats.tx.ro_tx_pkts);
308 printf(" - Ro Pkts tx failed: %"PRIu64"\n",
309 app_stats.tx.ro_tx_failed_pkts);
310 printf(" - Pkts transmitted w/o reorder: %"PRIu64"\n",
311 app_stats.tx.early_pkts_txtd_woro);
312 printf(" - Pkts tx failed w/o reorder: %"PRIu64"\n",
313 app_stats.tx.early_pkts_tx_failed_woro);
315 for (i = 0; i < nb_ports; i++) {
316 rte_eth_stats_get(i, ð_stats);
317 printf("\nPort %u stats:\n", i);
318 printf(" - Pkts in: %"PRIu64"\n", eth_stats.ipackets);
319 printf(" - Pkts out: %"PRIu64"\n", eth_stats.opackets);
320 printf(" - In Errs: %"PRIu64"\n", eth_stats.ierrors);
321 printf(" - Out Errs: %"PRIu64"\n", eth_stats.oerrors);
322 printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf);
327 int_handler(int sig_num)
329 printf("Exiting on signal %d\n", sig_num);
334 * This thread receives mbufs from the port and affects them an internal
335 * sequence number to keep track of their order of arrival through an
337 * The mbufs are then passed to the worker threads via the rx_to_workers
341 rx_thread(struct rte_ring *ring_out)
343 const uint8_t nb_ports = rte_eth_dev_count();
348 struct rte_mbuf *pkts[MAX_PKTS_BURST];
350 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
353 while (!quit_signal) {
355 for (port_id = 0; port_id < nb_ports; port_id++) {
356 if ((portmask & (1 << port_id)) != 0) {
358 /* receive packets */
359 nb_rx_pkts = rte_eth_rx_burst(port_id, 0,
360 pkts, MAX_PKTS_BURST);
361 if (nb_rx_pkts == 0) {
362 LOG_DEBUG(REORDERAPP,
363 "%s():Received zero packets\n", __func__);
366 app_stats.rx.rx_pkts += nb_rx_pkts;
368 /* mark sequence number */
369 for (i = 0; i < nb_rx_pkts; )
370 pkts[i++]->seqn = seqn++;
372 /* enqueue to rx_to_workers ring */
373 ret = rte_ring_enqueue_burst(ring_out, (void *) pkts,
375 app_stats.rx.enqueue_pkts += ret;
376 if (unlikely(ret < nb_rx_pkts)) {
377 app_stats.rx.enqueue_failed_pkts +=
379 pktmbuf_free_bulk(&pkts[ret], nb_rx_pkts - ret);
388 * This thread takes bursts of packets from the rx_to_workers ring and
389 * Changes the input port value to output port value. And feds it to
393 worker_thread(void *args_ptr)
395 const uint8_t nb_ports = rte_eth_dev_count();
397 uint16_t burst_size = 0;
398 struct worker_thread_args *args;
399 struct rte_mbuf *burst_buffer[MAX_PKTS_BURST] = { NULL };
400 struct rte_ring *ring_in, *ring_out;
401 const unsigned xor_val = (nb_ports > 1);
403 args = (struct worker_thread_args *) args_ptr;
404 ring_in = args->ring_in;
405 ring_out = args->ring_out;
407 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
410 while (!quit_signal) {
412 /* dequeue the mbufs from rx_to_workers ring */
413 burst_size = rte_ring_dequeue_burst(ring_in,
414 (void *)burst_buffer, MAX_PKTS_BURST);
415 if (unlikely(burst_size == 0))
418 __sync_fetch_and_add(&app_stats.wkr.dequeue_pkts, burst_size);
420 /* just do some operation on mbuf */
421 for (i = 0; i < burst_size;)
422 burst_buffer[i++]->port ^= xor_val;
424 /* enqueue the modified mbufs to workers_to_tx ring */
425 ret = rte_ring_enqueue_burst(ring_out, (void *)burst_buffer, burst_size);
426 __sync_fetch_and_add(&app_stats.wkr.enqueue_pkts, ret);
427 if (unlikely(ret < burst_size)) {
428 /* Return the mbufs to their respective pool, dropping packets */
429 __sync_fetch_and_add(&app_stats.wkr.enqueue_failed_pkts,
430 (int)burst_size - ret);
431 pktmbuf_free_bulk(&burst_buffer[ret], burst_size - ret);
438 flush_one_port(struct output_buffer *outbuf, uint8_t outp)
440 unsigned nb_tx = rte_eth_tx_burst(outp, 0, outbuf->mbufs,
442 app_stats.tx.ro_tx_pkts += nb_tx;
444 if (unlikely(nb_tx < outbuf->count)) {
445 /* free the mbufs which failed from transmit */
446 app_stats.tx.ro_tx_failed_pkts += (outbuf->count - nb_tx);
447 LOG_DEBUG(REORDERAPP, "%s:Packet loss with tx_burst\n", __func__);
448 pktmbuf_free_bulk(&outbuf->mbufs[nb_tx], outbuf->count - nb_tx);
454 * Dequeue mbufs from the workers_to_tx ring and reorder them before
458 send_thread(struct rte_ring *ring_in)
461 unsigned int i, dret;
462 uint16_t nb_dq_mbufs;
464 static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS];
465 struct rte_mbuf *mbufs[MAX_PKTS_BURST];
466 struct rte_mbuf *rombufs[MAX_PKTS_BURST] = {NULL};
467 struct rte_reorder_buffer *buffer;
469 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
471 buffer = rte_reorder_create("PKT_RO", rte_socket_id(), REORDER_BUFFER_SIZE);
472 while (!quit_signal) {
474 /* deque the mbufs from workers_to_tx ring */
475 nb_dq_mbufs = rte_ring_dequeue_burst(ring_in,
476 (void *)mbufs, MAX_PKTS_BURST);
478 if (unlikely(nb_dq_mbufs == 0))
481 app_stats.tx.dequeue_pkts += nb_dq_mbufs;
483 for (i = 0; i < nb_dq_mbufs; i++) {
484 /* send dequeued mbufs for reordering */
485 ret = rte_reorder_insert(buffer, mbufs[i]);
487 if (ret == -1 && rte_errno == ERANGE) {
488 /* Too early pkts should be transmitted out directly */
489 LOG_DEBUG(REORDERAPP, "%s():Cannot reorder early packet "
490 "direct enqueuing to TX\n", __func__);
491 outp = mbufs[i]->port;
492 if ((portmask & (1 << outp)) == 0) {
493 rte_pktmbuf_free(mbufs[i]);
496 if (rte_eth_tx_burst(outp, 0, (void *)mbufs[i], 1) != 1) {
497 rte_pktmbuf_free(mbufs[i]);
498 app_stats.tx.early_pkts_tx_failed_woro++;
500 app_stats.tx.early_pkts_txtd_woro++;
501 } else if (ret == -1 && rte_errno == ENOSPC) {
503 * Early pkts just outside of window should be dropped
505 rte_pktmbuf_free(mbufs[i]);
510 * drain MAX_PKTS_BURST of reordered
513 dret = rte_reorder_drain(buffer, rombufs, MAX_PKTS_BURST);
514 for (i = 0; i < dret; i++) {
516 struct output_buffer *outbuf;
519 outp1 = rombufs[i]->port;
520 /* skip ports that are not enabled */
521 if ((portmask & (1 << outp1)) == 0) {
522 rte_pktmbuf_free(rombufs[i]);
526 outbuf = &tx_buffers[outp1];
527 outbuf->mbufs[outbuf->count++] = rombufs[i];
528 if (outbuf->count == MAX_PKTS_BURST)
529 flush_one_port(outbuf, outp1);
536 * Dequeue mbufs from the workers_to_tx ring and transmit them
539 tx_thread(struct rte_ring *ring_in)
543 static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS];
544 struct rte_mbuf *mbufs[MAX_PKTS_BURST];
545 struct output_buffer *outbuf;
547 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
549 while (!quit_signal) {
551 /* deque the mbufs from workers_to_tx ring */
552 dqnum = rte_ring_dequeue_burst(ring_in,
553 (void *)mbufs, MAX_PKTS_BURST);
555 if (unlikely(dqnum == 0))
558 app_stats.tx.dequeue_pkts += dqnum;
560 for (i = 0; i < dqnum; i++) {
561 outp = mbufs[i]->port;
562 /* skip ports that are not enabled */
563 if ((portmask & (1 << outp)) == 0) {
564 rte_pktmbuf_free(mbufs[i]);
568 outbuf = &tx_buffers[outp];
569 outbuf->mbufs[outbuf->count++] = mbufs[i];
570 if (outbuf->count == MAX_PKTS_BURST)
571 flush_one_port(outbuf, outp);
579 main(int argc, char **argv)
583 unsigned int lcore_id, last_lcore_id, master_lcore_id;
585 uint8_t nb_ports_available;
586 struct worker_thread_args worker_args = {NULL, NULL};
587 struct rte_ring *rx_to_workers;
588 struct rte_ring *workers_to_tx;
590 /* catch ctrl-c so we can print on exit */
591 signal(SIGINT, int_handler);
594 ret = rte_eal_init(argc, argv);
601 /* Parse the application specific arguments */
602 ret = parse_args(argc, argv);
606 /* Check if we have enought cores */
607 if (rte_lcore_count() < 3)
608 rte_exit(EXIT_FAILURE, "Error, This application needs at "
609 "least 3 logical cores to run:\n"
610 "1 lcore for packet RX\n"
611 "1 lcore for packet TX\n"
612 "and at least 1 lcore for worker threads\n");
614 nb_ports = rte_eth_dev_count();
616 rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n");
617 if (nb_ports != 1 && (nb_ports & 1))
618 rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except "
619 "when using a single port\n");
621 mbuf_pool = rte_mempool_create("mbuf_pool", MBUF_PER_POOL, MBUF_SIZE,
622 MBUF_POOL_CACHE_SIZE,
623 sizeof(struct rte_pktmbuf_pool_private),
624 rte_pktmbuf_pool_init, NULL,
625 rte_pktmbuf_init, NULL,
627 if (mbuf_pool == NULL)
628 rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
630 nb_ports_available = nb_ports;
632 /* initialize all ports */
633 for (port_id = 0; port_id < nb_ports; port_id++) {
634 /* skip ports that are not enabled */
635 if ((portmask & (1 << port_id)) == 0) {
636 printf("\nSkipping disabled port %d\n", port_id);
637 nb_ports_available--;
641 printf("Initializing port %u... done\n", (unsigned) port_id);
643 if (configure_eth_port(port_id) != 0)
644 rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n",
648 if (!nb_ports_available) {
649 rte_exit(EXIT_FAILURE,
650 "All available ports are disabled. Please set portmask.\n");
653 /* Create rings for inter core communication */
654 rx_to_workers = rte_ring_create("rx_to_workers", RING_SIZE, rte_socket_id(),
656 if (rx_to_workers == NULL)
657 rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
659 workers_to_tx = rte_ring_create("workers_to_tx", RING_SIZE, rte_socket_id(),
661 if (workers_to_tx == NULL)
662 rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
664 last_lcore_id = get_last_lcore_id();
665 master_lcore_id = rte_get_master_lcore();
667 worker_args.ring_in = rx_to_workers;
668 worker_args.ring_out = workers_to_tx;
670 /* Start worker_thread() on all the available slave cores but the last 1 */
671 for (lcore_id = 0; lcore_id <= get_previous_lcore_id(last_lcore_id); lcore_id++)
672 if (rte_lcore_is_enabled(lcore_id) && lcore_id != master_lcore_id)
673 rte_eal_remote_launch(worker_thread, (void *)&worker_args,
677 /* Start tx_thread() on the last slave core */
678 rte_eal_remote_launch((lcore_function_t *)tx_thread, workers_to_tx,
681 /* Start send_thread() on the last slave core */
682 rte_eal_remote_launch((lcore_function_t *)send_thread, workers_to_tx,
685 /* Start rx_thread() on the master core */
686 rx_thread(rx_to_workers);
688 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
689 if (rte_eal_wait_lcore(lcore_id) < 0)