1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
9 #include <rte_common.h>
10 #include <rte_errno.h>
11 #include <rte_ethdev.h>
12 #include <rte_lcore.h>
13 #include <rte_malloc.h>
15 #include <rte_mempool.h>
17 #include <rte_reorder.h>
19 #define RX_DESC_PER_QUEUE 1024
20 #define TX_DESC_PER_QUEUE 1024
22 #define MAX_PKTS_BURST 32
23 #define REORDER_BUFFER_SIZE 8192
24 #define MBUF_PER_POOL 65535
25 #define MBUF_POOL_CACHE_SIZE 250
27 #define RING_SIZE 16384
29 /* Macros for printing using RTE_LOG */
30 #define RTE_LOGTYPE_REORDERAPP RTE_LOGTYPE_USER1
33 #define OPT_DISABLE_REORDER "disable-reorder"
34 OPT_DISABLE_REORDER_NUM = 256,
35 #define OPT_INSIGHT_WORKER "insight-worker"
36 OPT_INSIGHT_WORKER_NUM,
39 unsigned int portmask;
40 unsigned int disable_reorder;
41 unsigned int insight_worker;
42 volatile uint8_t quit_signal;
44 static struct rte_mempool *mbuf_pool;
46 static struct rte_eth_conf port_conf_default;
48 struct worker_thread_args {
49 struct rte_ring *ring_in;
50 struct rte_ring *ring_out;
53 struct send_thread_args {
54 struct rte_ring *ring_in;
55 struct rte_reorder_buffer *buffer;
58 volatile struct app_stats {
61 uint64_t enqueue_pkts;
62 uint64_t enqueue_failed_pkts;
63 } rx __rte_cache_aligned;
66 uint64_t dequeue_pkts;
67 uint64_t enqueue_pkts;
68 uint64_t enqueue_failed_pkts;
69 } wkr __rte_cache_aligned;
72 uint64_t dequeue_pkts;
73 /* Too early pkts transmitted directly w/o reordering */
74 uint64_t early_pkts_txtd_woro;
75 /* Too early pkts failed from direct transmit */
76 uint64_t early_pkts_tx_failed_woro;
78 uint64_t ro_tx_failed_pkts;
79 } tx __rte_cache_aligned;
82 /* per worker lcore stats */
83 struct wkr_stats_per {
86 uint64_t enq_failed_pkts;
87 } __rte_cache_aligned;
89 static struct wkr_stats_per wkr_stats[RTE_MAX_LCORE] = { {0} };
91 * Get the last enabled lcore ID
94 * The last enabled lcore ID.
97 get_last_lcore_id(void)
101 for (i = RTE_MAX_LCORE - 1; i >= 0; i--)
102 if (rte_lcore_is_enabled(i))
108 * Get the previous enabled lcore ID
110 * The current lcore ID
112 * The previous enabled lcore ID or the current lcore
113 * ID if it is the first available core.
116 get_previous_lcore_id(unsigned int id)
120 for (i = id - 1; i >= 0; i--)
121 if (rte_lcore_is_enabled(i))
127 pktmbuf_free_bulk(struct rte_mbuf *mbuf_table[], unsigned n)
131 for (i = 0; i < n; i++)
132 rte_pktmbuf_free(mbuf_table[i]);
137 print_usage(const char *prgname)
139 printf("%s [EAL options] -- -p PORTMASK\n"
140 " -p PORTMASK: hexadecimal bitmask of ports to configure\n",
145 parse_portmask(const char *portmask)
150 /* parse hexadecimal string */
151 pm = strtoul(portmask, &end, 16);
152 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
158 /* Parse the argument given in the command line of the application */
160 parse_args(int argc, char **argv)
165 char *prgname = argv[0];
166 static struct option lgopts[] = {
167 {OPT_DISABLE_REORDER, 0, NULL, OPT_DISABLE_REORDER_NUM},
168 {OPT_INSIGHT_WORKER, 0, NULL, OPT_INSIGHT_WORKER_NUM },
174 while ((opt = getopt_long(argc, argvopt, "p:",
175 lgopts, &option_index)) != EOF) {
179 portmask = parse_portmask(optarg);
181 printf("invalid portmask\n");
182 print_usage(prgname);
188 case OPT_DISABLE_REORDER_NUM:
189 printf("reorder disabled\n");
193 case OPT_INSIGHT_WORKER_NUM:
194 printf("print all worker statistics\n");
199 print_usage(prgname);
204 print_usage(prgname);
208 argv[optind-1] = prgname;
209 optind = 1; /* reset getopt lib */
214 * Tx buffer error callback
217 flush_tx_error_callback(struct rte_mbuf **unsent, uint16_t count,
218 void *userdata __rte_unused) {
220 /* free the mbufs which failed from transmit */
221 app_stats.tx.ro_tx_failed_pkts += count;
222 RTE_LOG_DP(DEBUG, REORDERAPP, "%s:Packet loss with tx_burst\n", __func__);
223 pktmbuf_free_bulk(unsent, count);
228 free_tx_buffers(struct rte_eth_dev_tx_buffer *tx_buffer[]) {
231 /* initialize buffers for all ports */
232 RTE_ETH_FOREACH_DEV(port_id) {
233 /* skip ports that are not enabled */
234 if ((portmask & (1 << port_id)) == 0)
237 rte_free(tx_buffer[port_id]);
243 configure_tx_buffers(struct rte_eth_dev_tx_buffer *tx_buffer[])
248 /* initialize buffers for all ports */
249 RTE_ETH_FOREACH_DEV(port_id) {
250 /* skip ports that are not enabled */
251 if ((portmask & (1 << port_id)) == 0)
254 /* Initialize TX buffers */
255 tx_buffer[port_id] = rte_zmalloc_socket("tx_buffer",
256 RTE_ETH_TX_BUFFER_SIZE(MAX_PKTS_BURST), 0,
257 rte_eth_dev_socket_id(port_id));
258 if (tx_buffer[port_id] == NULL)
259 rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n",
262 rte_eth_tx_buffer_init(tx_buffer[port_id], MAX_PKTS_BURST);
264 ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[port_id],
265 flush_tx_error_callback, NULL);
267 rte_exit(EXIT_FAILURE,
268 "Cannot set error callback for tx buffer on port %u\n",
275 configure_eth_port(uint16_t port_id)
277 struct rte_ether_addr addr;
278 const uint16_t rxRings = 1, txRings = 1;
281 uint16_t nb_rxd = RX_DESC_PER_QUEUE;
282 uint16_t nb_txd = TX_DESC_PER_QUEUE;
283 struct rte_eth_dev_info dev_info;
284 struct rte_eth_txconf txconf;
285 struct rte_eth_conf port_conf = port_conf_default;
287 if (!rte_eth_dev_is_valid_port(port_id))
290 ret = rte_eth_dev_info_get(port_id, &dev_info);
292 printf("Error during getting device (port %u) info: %s\n",
293 port_id, strerror(-ret));
297 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
298 port_conf.txmode.offloads |=
299 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
300 ret = rte_eth_dev_configure(port_id, rxRings, txRings, &port_conf);
304 ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd);
308 for (q = 0; q < rxRings; q++) {
309 ret = rte_eth_rx_queue_setup(port_id, q, nb_rxd,
310 rte_eth_dev_socket_id(port_id), NULL,
316 txconf = dev_info.default_txconf;
317 txconf.offloads = port_conf.txmode.offloads;
318 for (q = 0; q < txRings; q++) {
319 ret = rte_eth_tx_queue_setup(port_id, q, nb_txd,
320 rte_eth_dev_socket_id(port_id), &txconf);
325 ret = rte_eth_dev_start(port_id);
329 ret = rte_eth_macaddr_get(port_id, &addr);
331 printf("Failed to get MAC address (port %u): %s\n",
332 port_id, rte_strerror(-ret));
336 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
337 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
339 addr.addr_bytes[0], addr.addr_bytes[1],
340 addr.addr_bytes[2], addr.addr_bytes[3],
341 addr.addr_bytes[4], addr.addr_bytes[5]);
343 ret = rte_eth_promiscuous_enable(port_id);
354 struct rte_eth_stats eth_stats;
355 unsigned int lcore_id, last_lcore_id, main_lcore_id, end_w_lcore_id;
357 last_lcore_id = get_last_lcore_id();
358 main_lcore_id = rte_get_main_lcore();
359 end_w_lcore_id = get_previous_lcore_id(last_lcore_id);
361 printf("\nRX thread stats:\n");
362 printf(" - Pkts rxd: %"PRIu64"\n",
363 app_stats.rx.rx_pkts);
364 printf(" - Pkts enqd to workers ring: %"PRIu64"\n",
365 app_stats.rx.enqueue_pkts);
367 for (lcore_id = 0; lcore_id <= end_w_lcore_id; lcore_id++) {
369 && rte_lcore_is_enabled(lcore_id)
370 && lcore_id != main_lcore_id) {
371 printf("\nWorker thread stats on core [%u]:\n",
373 printf(" - Pkts deqd from workers ring: %"PRIu64"\n",
374 wkr_stats[lcore_id].deq_pkts);
375 printf(" - Pkts enqd to tx ring: %"PRIu64"\n",
376 wkr_stats[lcore_id].enq_pkts);
377 printf(" - Pkts enq to tx failed: %"PRIu64"\n",
378 wkr_stats[lcore_id].enq_failed_pkts);
381 app_stats.wkr.dequeue_pkts += wkr_stats[lcore_id].deq_pkts;
382 app_stats.wkr.enqueue_pkts += wkr_stats[lcore_id].enq_pkts;
383 app_stats.wkr.enqueue_failed_pkts +=
384 wkr_stats[lcore_id].enq_failed_pkts;
387 printf("\nWorker thread stats:\n");
388 printf(" - Pkts deqd from workers ring: %"PRIu64"\n",
389 app_stats.wkr.dequeue_pkts);
390 printf(" - Pkts enqd to tx ring: %"PRIu64"\n",
391 app_stats.wkr.enqueue_pkts);
392 printf(" - Pkts enq to tx failed: %"PRIu64"\n",
393 app_stats.wkr.enqueue_failed_pkts);
395 printf("\nTX stats:\n");
396 printf(" - Pkts deqd from tx ring: %"PRIu64"\n",
397 app_stats.tx.dequeue_pkts);
398 printf(" - Ro Pkts transmitted: %"PRIu64"\n",
399 app_stats.tx.ro_tx_pkts);
400 printf(" - Ro Pkts tx failed: %"PRIu64"\n",
401 app_stats.tx.ro_tx_failed_pkts);
402 printf(" - Pkts transmitted w/o reorder: %"PRIu64"\n",
403 app_stats.tx.early_pkts_txtd_woro);
404 printf(" - Pkts tx failed w/o reorder: %"PRIu64"\n",
405 app_stats.tx.early_pkts_tx_failed_woro);
407 RTE_ETH_FOREACH_DEV(i) {
408 rte_eth_stats_get(i, ð_stats);
409 printf("\nPort %u stats:\n", i);
410 printf(" - Pkts in: %"PRIu64"\n", eth_stats.ipackets);
411 printf(" - Pkts out: %"PRIu64"\n", eth_stats.opackets);
412 printf(" - In Errs: %"PRIu64"\n", eth_stats.ierrors);
413 printf(" - Out Errs: %"PRIu64"\n", eth_stats.oerrors);
414 printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf);
419 int_handler(int sig_num)
421 printf("Exiting on signal %d\n", sig_num);
426 * This thread receives mbufs from the port and affects them an internal
427 * sequence number to keep track of their order of arrival through an
429 * The mbufs are then passed to the worker threads via the rx_to_workers
433 rx_thread(struct rte_ring *ring_out)
439 struct rte_mbuf *pkts[MAX_PKTS_BURST];
441 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
444 while (!quit_signal) {
446 RTE_ETH_FOREACH_DEV(port_id) {
447 if ((portmask & (1 << port_id)) != 0) {
449 /* receive packets */
450 nb_rx_pkts = rte_eth_rx_burst(port_id, 0,
451 pkts, MAX_PKTS_BURST);
452 if (nb_rx_pkts == 0) {
453 RTE_LOG_DP(DEBUG, REORDERAPP,
454 "%s():Received zero packets\n", __func__);
457 app_stats.rx.rx_pkts += nb_rx_pkts;
459 /* mark sequence number */
460 for (i = 0; i < nb_rx_pkts; )
461 *rte_reorder_seqn(pkts[i++]) = seqn++;
463 /* enqueue to rx_to_workers ring */
464 ret = rte_ring_enqueue_burst(ring_out,
465 (void *)pkts, nb_rx_pkts, NULL);
466 app_stats.rx.enqueue_pkts += ret;
467 if (unlikely(ret < nb_rx_pkts)) {
468 app_stats.rx.enqueue_failed_pkts +=
470 pktmbuf_free_bulk(&pkts[ret], nb_rx_pkts - ret);
479 * This thread takes bursts of packets from the rx_to_workers ring and
480 * Changes the input port value to output port value. And feds it to
484 worker_thread(void *args_ptr)
486 const uint16_t nb_ports = rte_eth_dev_count_avail();
488 uint16_t burst_size = 0;
489 struct worker_thread_args *args;
490 struct rte_mbuf *burst_buffer[MAX_PKTS_BURST] = { NULL };
491 struct rte_ring *ring_in, *ring_out;
492 const unsigned xor_val = (nb_ports > 1);
493 unsigned int core_id = rte_lcore_id();
495 args = (struct worker_thread_args *) args_ptr;
496 ring_in = args->ring_in;
497 ring_out = args->ring_out;
499 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
502 while (!quit_signal) {
504 /* dequeue the mbufs from rx_to_workers ring */
505 burst_size = rte_ring_dequeue_burst(ring_in,
506 (void *)burst_buffer, MAX_PKTS_BURST, NULL);
507 if (unlikely(burst_size == 0))
510 wkr_stats[core_id].deq_pkts += burst_size;
512 /* just do some operation on mbuf */
513 for (i = 0; i < burst_size;)
514 burst_buffer[i++]->port ^= xor_val;
516 /* enqueue the modified mbufs to workers_to_tx ring */
517 ret = rte_ring_enqueue_burst(ring_out, (void *)burst_buffer,
519 wkr_stats[core_id].enq_pkts += ret;
520 if (unlikely(ret < burst_size)) {
521 /* Return the mbufs to their respective pool, dropping packets */
522 wkr_stats[core_id].enq_failed_pkts += burst_size - ret;
523 pktmbuf_free_bulk(&burst_buffer[ret], burst_size - ret);
530 * Dequeue mbufs from the workers_to_tx ring and reorder them before
534 send_thread(struct send_thread_args *args)
537 unsigned int i, dret;
538 uint16_t nb_dq_mbufs;
541 struct rte_mbuf *mbufs[MAX_PKTS_BURST];
542 struct rte_mbuf *rombufs[MAX_PKTS_BURST] = {NULL};
543 static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
545 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__, rte_lcore_id());
547 configure_tx_buffers(tx_buffer);
549 while (!quit_signal) {
551 /* deque the mbufs from workers_to_tx ring */
552 nb_dq_mbufs = rte_ring_dequeue_burst(args->ring_in,
553 (void *)mbufs, MAX_PKTS_BURST, NULL);
555 if (unlikely(nb_dq_mbufs == 0))
558 app_stats.tx.dequeue_pkts += nb_dq_mbufs;
560 for (i = 0; i < nb_dq_mbufs; i++) {
561 /* send dequeued mbufs for reordering */
562 ret = rte_reorder_insert(args->buffer, mbufs[i]);
564 if (ret == -1 && rte_errno == ERANGE) {
565 /* Too early pkts should be transmitted out directly */
566 RTE_LOG_DP(DEBUG, REORDERAPP,
567 "%s():Cannot reorder early packet "
568 "direct enqueuing to TX\n", __func__);
569 outp = mbufs[i]->port;
570 if ((portmask & (1 << outp)) == 0) {
571 rte_pktmbuf_free(mbufs[i]);
574 if (rte_eth_tx_burst(outp, 0, (void *)mbufs[i], 1) != 1) {
575 rte_pktmbuf_free(mbufs[i]);
576 app_stats.tx.early_pkts_tx_failed_woro++;
578 app_stats.tx.early_pkts_txtd_woro++;
579 } else if (ret == -1 && rte_errno == ENOSPC) {
581 * Early pkts just outside of window should be dropped
583 rte_pktmbuf_free(mbufs[i]);
588 * drain MAX_PKTS_BURST of reordered
591 dret = rte_reorder_drain(args->buffer, rombufs, MAX_PKTS_BURST);
592 for (i = 0; i < dret; i++) {
594 struct rte_eth_dev_tx_buffer *outbuf;
597 outp1 = rombufs[i]->port;
598 /* skip ports that are not enabled */
599 if ((portmask & (1 << outp1)) == 0) {
600 rte_pktmbuf_free(rombufs[i]);
604 outbuf = tx_buffer[outp1];
605 sent = rte_eth_tx_buffer(outp1, 0, outbuf, rombufs[i]);
607 app_stats.tx.ro_tx_pkts += sent;
611 free_tx_buffers(tx_buffer);
617 * Dequeue mbufs from the workers_to_tx ring and transmit them
620 tx_thread(struct rte_ring *ring_in)
625 struct rte_mbuf *mbufs[MAX_PKTS_BURST];
626 struct rte_eth_dev_tx_buffer *outbuf;
627 static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
629 RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
632 configure_tx_buffers(tx_buffer);
634 while (!quit_signal) {
636 /* deque the mbufs from workers_to_tx ring */
637 dqnum = rte_ring_dequeue_burst(ring_in,
638 (void *)mbufs, MAX_PKTS_BURST, NULL);
640 if (unlikely(dqnum == 0))
643 app_stats.tx.dequeue_pkts += dqnum;
645 for (i = 0; i < dqnum; i++) {
646 outp = mbufs[i]->port;
647 /* skip ports that are not enabled */
648 if ((portmask & (1 << outp)) == 0) {
649 rte_pktmbuf_free(mbufs[i]);
653 outbuf = tx_buffer[outp];
654 sent = rte_eth_tx_buffer(outp, 0, outbuf, mbufs[i]);
656 app_stats.tx.ro_tx_pkts += sent;
664 main(int argc, char **argv)
668 unsigned int lcore_id, last_lcore_id, main_lcore_id;
670 uint16_t nb_ports_available;
671 struct worker_thread_args worker_args = {NULL, NULL};
672 struct send_thread_args send_args = {NULL, NULL};
673 struct rte_ring *rx_to_workers;
674 struct rte_ring *workers_to_tx;
676 /* catch ctrl-c so we can print on exit */
677 signal(SIGINT, int_handler);
680 ret = rte_eal_init(argc, argv);
682 rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
687 /* Parse the application specific arguments */
688 ret = parse_args(argc, argv);
690 rte_exit(EXIT_FAILURE, "Invalid packet_ordering arguments\n");
692 /* Check if we have enought cores */
693 if (rte_lcore_count() < 3)
694 rte_exit(EXIT_FAILURE, "Error, This application needs at "
695 "least 3 logical cores to run:\n"
696 "1 lcore for packet RX\n"
697 "1 lcore for packet TX\n"
698 "and at least 1 lcore for worker threads\n");
700 nb_ports = rte_eth_dev_count_avail();
702 rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n");
703 if (nb_ports != 1 && (nb_ports & 1))
704 rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except "
705 "when using a single port\n");
707 mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL,
708 MBUF_POOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
710 if (mbuf_pool == NULL)
711 rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
713 nb_ports_available = nb_ports;
715 /* initialize all ports */
716 RTE_ETH_FOREACH_DEV(port_id) {
717 /* skip ports that are not enabled */
718 if ((portmask & (1 << port_id)) == 0) {
719 printf("\nSkipping disabled port %d\n", port_id);
720 nb_ports_available--;
724 printf("Initializing port %u... done\n", port_id);
726 if (configure_eth_port(port_id) != 0)
727 rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n",
731 if (!nb_ports_available) {
732 rte_exit(EXIT_FAILURE,
733 "All available ports are disabled. Please set portmask.\n");
736 /* Create rings for inter core communication */
737 rx_to_workers = rte_ring_create("rx_to_workers", RING_SIZE, rte_socket_id(),
739 if (rx_to_workers == NULL)
740 rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
742 workers_to_tx = rte_ring_create("workers_to_tx", RING_SIZE, rte_socket_id(),
744 if (workers_to_tx == NULL)
745 rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
747 if (!disable_reorder) {
748 send_args.buffer = rte_reorder_create("PKT_RO", rte_socket_id(),
749 REORDER_BUFFER_SIZE);
750 if (send_args.buffer == NULL)
751 rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
754 last_lcore_id = get_last_lcore_id();
755 main_lcore_id = rte_get_main_lcore();
757 worker_args.ring_in = rx_to_workers;
758 worker_args.ring_out = workers_to_tx;
760 /* Start worker_thread() on all the available worker cores but the last 1 */
761 for (lcore_id = 0; lcore_id <= get_previous_lcore_id(last_lcore_id); lcore_id++)
762 if (rte_lcore_is_enabled(lcore_id) && lcore_id != main_lcore_id)
763 rte_eal_remote_launch(worker_thread, (void *)&worker_args,
766 if (disable_reorder) {
767 /* Start tx_thread() on the last worker core */
768 rte_eal_remote_launch((lcore_function_t *)tx_thread, workers_to_tx,
771 send_args.ring_in = workers_to_tx;
772 /* Start send_thread() on the last worker core */
773 rte_eal_remote_launch((lcore_function_t *)send_thread,
774 (void *)&send_args, last_lcore_id);
777 /* Start rx_thread() on the main core */
778 rx_thread(rx_to_workers);
780 RTE_LCORE_FOREACH_WORKER(lcore_id) {
781 if (rte_eal_wait_lcore(lcore_id) < 0)
787 /* clean up the EAL */